Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 67 additions & 53 deletions batchProcess.py
Original file line number Diff line number Diff line change
@@ -1,82 +1,96 @@
import csv
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from percy import percy_snapshot
import re
from time import sleep
from concurrent.futures import ThreadPoolExecutor
from selenium.webdriver.chrome.options import Options
from urllib.parse import urlparse
import re

CSV_FILE = './urls.csv' # Path to your CSV file
NUM_THREADS = 2 # Number of parallel threads
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from percy import percy_snapshot

CSV_FILE = "./urls.csv"
NUM_THREADS = 2

# Load URLs from CSV
def load_urls():
with open(CSV_FILE, newline='') as file:
with open(CSV_FILE, newline="") as file:
reader = csv.reader(file)
return [row[0].strip() for row in reader if row and row[0].strip().startswith(("http://", "https://"))]
return [
row[0].strip()
for row in reader
if row and row[0].strip().startswith(("http://", "https://"))
]

# Helper to split list into n even chunks
def split_list(lst, n):
k, m = divmod(len(lst), n)
return [lst[i*k + min(i, m):(i+1)*k + min(i+1, m)] for i in range(n)]
return [
lst[i * k + min(i, m):(i + 1) * k + min(i + 1, m)]
for i in range(n)
]

# Function for each thread to process its batch of URLs
def process_urls(urls):
if not urls:
print("No URLs provided to process.")
return
# Use webdriver-manager to automatically install Chromedriver
def get_snapshot_name(url):
parsed = urlparse(url)
hostname = parsed.netloc.replace("www.", "")
path = parsed.path.strip("/")
sanitized_path = re.sub(
r"[^a-zA-Z0-9_-]",
"_",
path
)
if sanitized_path:
return f"{hostname}_{sanitized_path}"
return hostname

def create_driver():
options = Options()
options.add_argument("--headless=new") # optional but recommended for Percy
options.add_argument("--no-sandbox")
options.add_argument("--headless=new")
options.add_argument("--disable-dev-shm-usage")

service = Service(ChromeDriverManager(driver_version="139.0.7258.155").install())
driver = webdriver.Chrome(service=service, options=options)
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
driver = webdriver.Chrome(options=options)
driver.set_window_size(1200, 800)
return driver

def process_urls(urls):
if not urls:
return
driver = create_driver()
try:
for url in urls:
print(f"Loading URL: {url}")
driver.get(url)
sleep(2)

parsed_url = urlparse(url)
hostname = parsed_url.netloc
if hostname.startswith("www."):
hostname = hostname[4:]

# Sanitize path: remove leading slash and replace other slashes with underscores
path = parsed_url.path.lstrip('/')
sanitized_path = re.sub(r'[^a-zA-Z0-9_-]', '_', path) # Replace non-alphanum/underscore/dash chars

# Construct snapshot name
if sanitized_path:
snapshot_name = f"Snapshot for {hostname}_{sanitized_path}"
else:
snapshot_name = f"Snapshot for {hostname}"

print(f"Capturing Percy snapshot: {snapshot_name}")
percy_snapshot(driver, snapshot_name,widths=[768, 1200])

try:
print(f"Loading URL: {url}")
driver.get(url)
sleep(3)
snapshot_name = get_snapshot_name(url)
print(f"Taking Percy snapshot: {snapshot_name}")
percy_snapshot(
driver,
snapshot_name,
widths=[768, 1200]
)
print(f"Snapshot completed: {snapshot_name}")
except Exception as e:
print(f"Failed for URL: {url}")
print(str(e))
finally:
driver.quit()

def main():
urls = load_urls()
if not urls:
print("No URLs found in the CSV file.")
print("No URLs found in urls.csv")
return

url_batches = split_list(urls, NUM_THREADS)

print(f"Found {len(urls)} URLs")
print(f"Using {NUM_THREADS} parallel threads")
batches = split_list(urls, NUM_THREADS)
with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
# Submit only non-empty batches
futures = [executor.submit(process_urls, batch) for batch in url_batches if batch]
futures = [
executor.submit(process_urls, batch)
for batch in batches
if batch
]
for future in futures:
future.result()
print("All snapshots completed")

if __name__ == "__main__":
main()
main()
24 changes: 10 additions & 14 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
{
"name": "python-csv-python",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"description": "",
"devDependencies": {
"@percy/cli": "^1.30.1"
}
}

"name": "python-csv-percy",
"version": "1.0.0",
"private": true,
"scripts": {
"percy": "percy exec -- python3 batchProcess.py"
},
"devDependencies": {
"@percy/cli": "^1.31.4"
}
}
19 changes: 1 addition & 18 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,2 @@
attrs==24.2.0
certifi==2024.8.30
charset-normalizer==3.4.0
h11==0.14.0
idna==3.10
outcome==1.3.0.post0
percy-selenium==2.1.1
PySocks==1.7.1
requests==2.32.3
selenium==4.25.0
sniffio==1.3.1
sortedcontainers==2.4.0
trio==0.27.0
trio-websocket==0.11.1
typing_extensions==4.12.2
urllib3==2.2.3
websocket-client==1.8.0
wsproto==1.2.0
webdriver-manager==4.0.2
percy-selenium==2.1.1
2 changes: 1 addition & 1 deletion urls.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
URLs
https://www.browserstack.com/docs/app-percy/integrate-bstack-sdk/webdriverio
https://www.jeep.com/new-inventory/vehicle-details.gladiator.2026.html?vin=1C6RJTEG3TL189698&dealerCode=26915&ccode=IUJ202611JTJH98B&llp=2TA&radius=100&zipCode=10014
Loading