diff --git a/batchProcess.py b/batchProcess.py index 2632135..d5824fa 100644 --- a/batchProcess.py +++ b/batchProcess.py @@ -1,82 +1,96 @@ import csv -from selenium import webdriver -from selenium.webdriver.chrome.service import Service -from webdriver_manager.chrome import ChromeDriverManager -from percy import percy_snapshot +import re from time import sleep from concurrent.futures import ThreadPoolExecutor -from selenium.webdriver.chrome.options import Options from urllib.parse import urlparse -import re -CSV_FILE = './urls.csv' # Path to your CSV file -NUM_THREADS = 2 # Number of parallel threads +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from percy import percy_snapshot + +CSV_FILE = "./urls.csv" +NUM_THREADS = 2 -# Load URLs from CSV def load_urls(): - with open(CSV_FILE, newline='') as file: + with open(CSV_FILE, newline="") as file: reader = csv.reader(file) - return [row[0].strip() for row in reader if row and row[0].strip().startswith(("http://", "https://"))] + return [ + row[0].strip() + for row in reader + if row and row[0].strip().startswith(("http://", "https://")) + ] -# Helper to split list into n even chunks def split_list(lst, n): k, m = divmod(len(lst), n) - return [lst[i*k + min(i, m):(i+1)*k + min(i+1, m)] for i in range(n)] + return [ + lst[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] + for i in range(n) + ] -# Function for each thread to process its batch of URLs -def process_urls(urls): - if not urls: - print("No URLs provided to process.") - return - # Use webdriver-manager to automatically install Chromedriver +def get_snapshot_name(url): + parsed = urlparse(url) + hostname = parsed.netloc.replace("www.", "") + path = parsed.path.strip("/") + sanitized_path = re.sub( + r"[^a-zA-Z0-9_-]", + "_", + path + ) + if sanitized_path: + return f"{hostname}_{sanitized_path}" + return hostname + +def create_driver(): options = Options() - options.add_argument("--headless=new") # optional but recommended for Percy - options.add_argument("--no-sandbox") + options.add_argument("--headless=new") options.add_argument("--disable-dev-shm-usage") - - service = Service(ChromeDriverManager(driver_version="139.0.7258.155").install()) - driver = webdriver.Chrome(service=service, options=options) + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + driver = webdriver.Chrome(options=options) driver.set_window_size(1200, 800) + return driver + +def process_urls(urls): + if not urls: + return + driver = create_driver() try: for url in urls: - print(f"Loading URL: {url}") - driver.get(url) - sleep(2) - - parsed_url = urlparse(url) - hostname = parsed_url.netloc - if hostname.startswith("www."): - hostname = hostname[4:] - - # Sanitize path: remove leading slash and replace other slashes with underscores - path = parsed_url.path.lstrip('/') - sanitized_path = re.sub(r'[^a-zA-Z0-9_-]', '_', path) # Replace non-alphanum/underscore/dash chars - - # Construct snapshot name - if sanitized_path: - snapshot_name = f"Snapshot for {hostname}_{sanitized_path}" - else: - snapshot_name = f"Snapshot for {hostname}" - - print(f"Capturing Percy snapshot: {snapshot_name}") - percy_snapshot(driver, snapshot_name,widths=[768, 1200]) - + try: + print(f"Loading URL: {url}") + driver.get(url) + sleep(3) + snapshot_name = get_snapshot_name(url) + print(f"Taking Percy snapshot: {snapshot_name}") + percy_snapshot( + driver, + snapshot_name, + widths=[768, 1200] + ) + print(f"Snapshot completed: {snapshot_name}") + except Exception as e: + print(f"Failed for URL: {url}") + print(str(e)) finally: driver.quit() def main(): urls = load_urls() if not urls: - print("No URLs found in the CSV file.") + print("No URLs found in urls.csv") return - - url_batches = split_list(urls, NUM_THREADS) - + print(f"Found {len(urls)} URLs") + print(f"Using {NUM_THREADS} parallel threads") + batches = split_list(urls, NUM_THREADS) with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor: - # Submit only non-empty batches - futures = [executor.submit(process_urls, batch) for batch in url_batches if batch] + futures = [ + executor.submit(process_urls, batch) + for batch in batches + if batch + ] for future in futures: future.result() + print("All snapshots completed") if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/package.json b/package.json index 48e1101..dd72fc0 100644 --- a/package.json +++ b/package.json @@ -1,15 +1,11 @@ { - "name": "python-csv-python", - "version": "1.0.0", - "main": "index.js", - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, - "author": "", - "license": "ISC", - "description": "", - "devDependencies": { - "@percy/cli": "^1.30.1" - } - } - \ No newline at end of file +"name": "python-csv-percy", +"version": "1.0.0", +"private": true, +"scripts": { +"percy": "percy exec -- python3 batchProcess.py" +}, +"devDependencies": { +"@percy/cli": "^1.31.4" +} +} diff --git a/requirements.txt b/requirements.txt index 74df9c7..78c39a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,2 @@ -attrs==24.2.0 -certifi==2024.8.30 -charset-normalizer==3.4.0 -h11==0.14.0 -idna==3.10 -outcome==1.3.0.post0 -percy-selenium==2.1.1 -PySocks==1.7.1 -requests==2.32.3 selenium==4.25.0 -sniffio==1.3.1 -sortedcontainers==2.4.0 -trio==0.27.0 -trio-websocket==0.11.1 -typing_extensions==4.12.2 -urllib3==2.2.3 -websocket-client==1.8.0 -wsproto==1.2.0 -webdriver-manager==4.0.2 \ No newline at end of file +percy-selenium==2.1.1 diff --git a/urls.csv b/urls.csv index c9d31e6..bd44c19 100644 --- a/urls.csv +++ b/urls.csv @@ -1,2 +1,2 @@ URLs -https://www.browserstack.com/docs/app-percy/integrate-bstack-sdk/webdriverio +https://www.jeep.com/new-inventory/vehicle-details.gladiator.2026.html?vin=1C6RJTEG3TL189698&dealerCode=26915&ccode=IUJ202611JTJH98B&llp=2TA&radius=100&zipCode=10014