From 53d6e13e1b27292baaddc7afd1c82b8d2022b2ac Mon Sep 17 00:00:00 2001 From: Anup Das <149822306+anupddas@users.noreply.github.com> Date: Sun, 31 May 2026 09:33:12 +0530 Subject: [PATCH] fix: add Windows cross-platform support to selenium_module.py Resolves #723 - find_chromedriver(): now delegates to platform_mgr.get_chromedriver_paths() so Windows paths (C:\Program Files\ChromeDriver\chromedriver.exe etc.) are searched alongside Linux/macOS paths. - create_driver(): gate --no-sandbox, --disable-dev-shm-usage, --no-zygote behind `if platform_mgr.is_linux` - these are Linux process-model flags that destabilise Chrome on Windows. - create_driver(): set options.binary_location via platform_mgr.find_chromium_executable() so Selenium always has an explicit Chrome path instead of relying on PATH. - create_driver(): on Windows, create a temp --user-data-dir per run (cleaned up via atexit) to avoid locked/managed/work-profile failures. - Error messages in create_driver() and import block are now platform-aware (Windows gives winget/choco/chromedriver.exe guidance; Linux keeps the existing apt/setup.sh advice). - check_browsers_available(): use platform_mgr paths and binary names so Chrome detection works on all three platforms. --- Python/modules/selenium_module.py | 469 ++++++++++++++++++------------ 1 file changed, 281 insertions(+), 188 deletions(-) diff --git a/Python/modules/selenium_module.py b/Python/modules/selenium_module.py index 43fa555f..6dceecf7 100644 --- a/Python/modules/selenium_module.py +++ b/Python/modules/selenium_module.py @@ -1,9 +1,10 @@ #!/usr/bin/env python3 """ Chromium-based selenium module for EyeWitness -Simplified single-browser approach using Chrome/Chromium headless +Cross-platform support for Windows, Linux, and macOS. """ +import atexit import http.client import os import socket @@ -30,39 +31,63 @@ from selenium.common.exceptions import WebDriverException except ImportError: print('[*] Selenium not found.') - print('[*] Run pip list to verify installation') - print('[*] Try: sudo apt install python3-selenium') + print('[*] Run: pip list | grep selenium to verify installation') + # Platform-aware install hint + import platform as _platform + if _platform.system().lower() == 'windows': + print('[*] Try: pip install selenium') + print('[*] Or activate the virtual env: eyewitness-venv\\Scripts\\activate.bat') + else: + print('[*] Try: pip install selenium') + print('[*] Or: sudo apt install python3-selenium (Debian/Ubuntu)') sys.exit() from modules.helpers import do_delay from modules.platform_utils import platform_mgr from modules.security_headers import collect_http_headers -# Platform-specific environment configuration for headless operation +# ── Platform-specific environment setup ────────────────────────────────────── +# Only apply Linux/headless-server env vars when actually on Linux. +# Windows has a native display and does NOT need these, and setting +# CHROME_NO_SANDBOX on Windows can silently destabilise the driver. if platform_mgr.is_linux: - # Optimize for headless Linux servers - os.environ['DISPLAY'] = ':99' # Virtual display + if not os.environ.get('DISPLAY'): + os.environ['DISPLAY'] = ':99' # used by Xvfb / virtual display os.environ['CHROME_HEADLESS'] = '1' os.environ['CHROME_NO_SANDBOX'] = '1' +# ── Driver factory ──────────────────────────────────────────────────────────── + def create_driver(cli_parsed, user_agent=None): - """Creates a Chromium WebDriver optimized for headless operation - + """Creates a Chromium WebDriver optimised for headless operation. + + Works on Windows, Linux (including Docker/headless servers), and macOS. + Args: - cli_parsed (ArgumentParser): Command Line Object - user_agent (String, optional): Optional user-agent string - + cli_parsed: Parsed CLI arguments object. + user_agent (str, optional): Override User-Agent string. + Returns: - ChromeDriver: Selenium Chrome Webdriver + WebDriver: A configured Selenium Chrome WebDriver instance. """ try: options = ChromeOptions() - - # Essential headless configuration - options.add_argument('--headless=new') # Use new headless mode - options.add_argument('--no-sandbox') - options.add_argument('--disable-dev-shm-usage') + + # ── Headless mode ───────────────────────────────────────────────────── + if not getattr(cli_parsed, 'show_selenium', False): + options.add_argument('--headless=new') # Chrome 112+ new headless + + # ── Platform-conditional stability flags ────────────────────────────── + # --no-sandbox, --disable-dev-shm-usage and --no-zygote are Linux + # process-model / container flags. Passing them on Windows causes + # ChromeDriver to complain and can crash the browser session. + if platform_mgr.is_linux: + options.add_argument('--no-sandbox') + options.add_argument('--disable-dev-shm-usage') # Linux shm fix + options.add_argument('--no-zygote') + + # Flags that are safe on every platform options.add_argument('--disable-gpu') options.add_argument('--disable-web-security') options.add_argument('--allow-running-insecure-content') @@ -70,231 +95,269 @@ def create_driver(cli_parsed, user_agent=None): options.add_argument('--ignore-ssl-errors') options.add_argument('--ignore-certificate-errors-spki-list') options.add_argument('--disable-features=VizDisplayCompositor') - - # Memory and performance optimization + + # Memory / performance options.add_argument('--memory-pressure-off') options.add_argument('--max_old_space_size=4096') - options.add_argument('--no-zygote') options.add_argument('--disable-background-timer-throttling') options.add_argument('--disable-renderer-backgrounding') options.add_argument('--disable-backgrounding-occluded-windows') - - # Window size configuration - width = getattr(cli_parsed, 'width', 1920) + + # Window size + width = getattr(cli_parsed, 'width', 1920) height = getattr(cli_parsed, 'height', 1080) options.add_argument(f'--window-size={width},{height}') - - # User agent configuration + + # User-Agent if user_agent: options.add_argument(f'--user-agent={user_agent}') - elif hasattr(cli_parsed, 'user_agent') and cli_parsed.user_agent: + elif getattr(cli_parsed, 'user_agent', None): options.add_argument(f'--user-agent={cli_parsed.user_agent}') - - # Disable automation detection + + # Reduce automation fingerprint options.add_argument('--disable-blink-features=AutomationControlled') - options.add_experimental_option("excludeSwitches", ["enable-automation"]) + options.add_experimental_option('excludeSwitches', ['enable-automation']) options.add_experimental_option('useAutomationExtension', False) - - # Security and certificate handling options.accept_insecure_certs = True - - # Setup Chrome service + + # ── Chrome binary location ──────────────────────────────────────────── + # platform_utils.py already knows the right paths for every OS; + # we just need to pass them through to Selenium so it never relies + # on an ambiguous PATH lookup that fails silently on Windows. + chrome_binary = platform_mgr.find_chromium_executable() + if chrome_binary: + options.binary_location = chrome_binary + print(f'[*] Using Chrome binary: {chrome_binary}') + + # ── Windows: temporary user-data-dir ───────────────────────────────── + # On Windows the default Chrome profile is often locked by a running + # browser, or is a managed/work/synced profile that blocks automation. + # A fresh temp directory avoids all of those problems. + if platform_mgr.is_windows: + temp_profile = tempfile.mkdtemp(prefix='ew_chrome_') + options.add_argument(f'--user-data-dir={temp_profile}') + # Clean up the temp directory when Python exits + atexit.register(shutil.rmtree, temp_profile, True) + print(f'[*] Using temporary Chrome profile: {temp_profile}') + + # ── Proxy ───────────────────────────────────────────────────────────── + if getattr(cli_parsed, 'proxy_ip', None) and getattr(cli_parsed, 'proxy_port', None): + proxy_type = getattr(cli_parsed, 'proxy_type', 'http') + options.add_argument( + f'--proxy-server={proxy_type}://{cli_parsed.proxy_ip}:{cli_parsed.proxy_port}' + ) + + # ── Temp dir (cross-platform) ───────────────────────────────────────── + temp_dir = tempfile.gettempdir() + os.environ['TMPDIR'] = temp_dir + os.environ['TMP'] = temp_dir + os.environ['TEMP'] = temp_dir + + # ── ChromeDriver service ────────────────────────────────────────────── service_kwargs = {} - - # Find chromedriver automatically chromedriver_path = find_chromedriver() if chromedriver_path: service_kwargs['executable_path'] = chromedriver_path - - # Configure temp directory for better compatibility - temp_dir = tempfile.gettempdir() - os.environ['TMPDIR'] = temp_dir - os.environ['TMP'] = temp_dir - os.environ['TEMP'] = temp_dir - + print(f'[*] Using ChromeDriver: {chromedriver_path}') + else: + # Selenium >=4.6 ships Selenium Manager which can auto-download + # chromedriver – no explicit path needed in that case. + print('[*] ChromeDriver not found in known locations; ' + 'relying on Selenium Manager auto-detection.') + service = ChromeService(**service_kwargs) - - # Create Chrome driver - driver = webdriver.Chrome(service=service, options=options) - - # Set timeouts and window size + driver = webdriver.Chrome(service=service, options=options) + + # Final setup driver.set_page_load_timeout(cli_parsed.timeout) driver.set_window_size(width, height) - - # Remove automation indicators - driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") - - print(f'[+] Chrome driver initialized successfully (headless mode)') + driver.execute_script( + "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})" + ) + + print('[+] Chrome driver initialised successfully (headless mode)') return driver - + except Exception as e: - from modules.troubleshooting import get_error_guidance - print(f'[!] Chrome WebDriver initialization error: {e}') - print('[*] Troubleshooting tips:') - print(' - Ensure Chromium is installed: sudo apt install chromium-browser') - print(' - Install chromedriver: sudo apt install chromium-chromedriver') - print(' - Run the setup script: sudo ./setup/setup.sh') - - # Special handling for common Chrome errors - error_str = str(e).lower() + print(f'[!] Chrome WebDriver initialisation error: {e}') + _print_driver_troubleshooting(e) + sys.exit(1) + + +def _print_driver_troubleshooting(exc): + """Print platform-appropriate troubleshooting hints after a driver error.""" + error_str = str(exc).lower() + + if platform_mgr.is_windows: + print('\n[*] Windows troubleshooting:') + print(' 1. Verify Chrome is installed:') + print(' C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe') + print(' 2. Download the matching ChromeDriver from:') + print(' https://chromedriver.chromium.org/downloads') + print(' or: https://googlechromelabs.github.io/chrome-for-testing/') + print(' 3. Place chromedriver.exe in one of:') + print(' C:\\Program Files\\ChromeDriver\\chromedriver.exe') + print(' (or add its folder to your system PATH)') + print(' 4. Alternatively, upgrade Selenium for auto-management:') + print(' pip install --upgrade selenium') + print(' 5. Run setup script as Administrator:') + print(' .\\setup\\setup.ps1') + if 'chromedriver' in error_str or 'executable' in error_str: + print('\n[!] ChromeDriver not found or version mismatch') + print('[*] Check your Chrome version at chrome://version') + print('[*] Download the matching chromedriver.exe') + elif 'chrome' in error_str: + print('\n[!] Chrome browser not found at expected location') + print('[*] Install Google Chrome: https://www.google.com/chrome/') + elif platform_mgr.is_mac: + print('\n[*] macOS troubleshooting:') + print(' - Install Chrome: brew install --cask google-chrome') + print(' - Install chromedriver: brew install chromedriver') + print(' - Allow chromedriver in System Preferences > Security') + print(' - Or run the setup script: ./setup/setup.sh') + else: + # Linux / container + print('\n[*] Linux troubleshooting:') + print(' - Install Chromium: sudo apt install chromium-browser') + print(' - Install ChromeDriver: sudo apt install chromium-chromedriver') + print(' - Or run the setup script: sudo ./setup/setup.sh') if 'chromedriver' in error_str: print('\n[!] ChromeDriver not found or incompatible') print('[*] Quick fix: sudo apt install chromium-chromedriver') elif 'chrome' in error_str or 'chromium' in error_str: print('\n[!] Chrome/Chromium browser not found') print('[*] Quick fix: sudo apt install chromium-browser') - - sys.exit(1) +# ── ChromeDriver discovery ──────────────────────────────────────────────────── + def find_chromedriver(): - """Find chromedriver executable in various locations""" - # Common chromedriver locations - possible_paths = [ - '/usr/bin/chromedriver', - '/usr/local/bin/chromedriver', - '/snap/bin/chromium.chromedriver', - shutil.which('chromedriver'), - shutil.which('chromium-chromedriver'), - ] - - for path in possible_paths: + """Find the chromedriver executable, searching PATH then known OS locations. + + Delegates to platform_utils.PlatformManager for the OS-specific path list + so that Windows, Linux, and macOS paths are all handled correctly. + + Returns: + str | None: Absolute path to chromedriver, or None if not found. + """ + # 1. Check PATH first (works on all platforms; honours user customisation) + for name in ['chromedriver', 'chromedriver.exe', 'chromium-chromedriver']: + found = shutil.which(name) + if found: + return found + + # 2. Fall back to the platform-specific hardcoded locations + for path in platform_mgr.get_chromedriver_paths(): if path and Path(path).exists(): return path - + return None +# ── Host capture ────────────────────────────────────────────────────────────── + def capture_host(cli_parsed, http_object, driver, ua=None): - """Screenshots a single host using Chrome and returns updated HTTP Object - - Enhanced version that collects HTTP headers and performs security analysis - alongside Selenium screenshot capture. - + """Screenshot a single host and return the updated HTTP object. + + Collects HTTP headers via urllib first, then uses Selenium for the + screenshot. + Args: - cli_parsed (ArgumentParser): Command Line Object - http_object (HTTPObject): HTTP Object - driver (WebDriver): Selenium WebDriver - ua (str, optional): User agent string - + cli_parsed: Parsed CLI arguments. + http_object: HTTPTableObject for the target URL. + driver: Active Selenium WebDriver. + ua (str, optional): Override User-Agent. + Returns: - tuple: (HTTPObject, WebDriver) Updated objects + tuple: (http_object, driver) """ - # Step 1: Collect HTTP headers via HTTP client (before Selenium) + # ── Step 1: Collect HTTP headers ───────────────────────────────────────── print(f'[*] Collecting headers for {http_object.remote_system}') - - # Set up proxy configuration if provided + proxy_config = None - if hasattr(cli_parsed, 'proxy_ip') and cli_parsed.proxy_ip: + if getattr(cli_parsed, 'proxy_ip', None): proxy_config = { - 'ip': cli_parsed.proxy_ip, - 'port': getattr(cli_parsed, 'proxy_port', 8080) + 'ip': cli_parsed.proxy_ip, + 'port': getattr(cli_parsed, 'proxy_port', 8080), } - - # Collect headers with HTTP client + headers, header_error = collect_http_headers( url=http_object.remote_system, timeout=getattr(cli_parsed, 'timeout', 7), user_agent=ua or getattr(cli_parsed, 'user_agent', None), - proxy=proxy_config + proxy=proxy_config, ) - - # Store headers in HTTPTableObject + if headers: - # Store raw headers in HTTPTableObject http_object.http_headers = headers - - # Create formatted headers display for the report - formatted_headers = {} - for key, value in headers.items(): - # Truncate long header values for display - display_value = value[:150] + "..." if len(value) > 150 else value - formatted_headers[key] = display_value - - http_object.headers = formatted_headers - + http_object.headers = { + k: (v[:150] + '...' if len(v) > 150 else v) + for k, v in headers.items() + } print(f'[+] Headers collected: {len(headers)} headers') else: - # Handle header collection failure - if header_error: - print(f'[!] Header collection failed for {http_object.remote_system}: {header_error}') - http_object.headers = {"Header Collection": f"Failed - {header_error}"} - else: - print(f'[!] No headers received from {http_object.remote_system}') - http_object.headers = {"Headers": "No headers received"} - - # Step 2: Continue with Selenium screenshot capture + msg = header_error or 'No headers received' + print(f'[!] Header collection failed for {http_object.remote_system}: {msg}') + http_object.headers = {'Header Collection': f'Failed – {msg}'} + + # ── Step 2: Selenium screenshot ─────────────────────────────────────────── try: print(f'[*] Taking screenshot of {http_object.remote_system}') driver.get(http_object.remote_system) - - # Handle page load timeout + try: - # Wait for page to load driver.implicitly_wait(3) except TimeoutException: - pass # Continue with screenshot anyway - - # Capture page content - http_object.source_code = driver.page_source.encode('utf-8') - http_object.page_title = driver.title + pass + http_object.source_code = driver.page_source.encode('utf-8') + http_object.page_title = driver.title - # Persist source_code to the source folder using the same filename strategy + # Persist page source try: - # Normalize bytes src_bytes = http_object.source_code if isinstance(src_bytes, str): src_bytes = src_bytes.encode('utf-8') - # Prefer an already-set source_path + if getattr(http_object, 'source_path', None): dest = Path(http_object.source_path) else: - # Build filename like set_paths() file_name = http_object.remote_system.replace('://', '.') - for char in [':', '/', '?', '=', '%', '+']: - file_name = file_name.replace(char, '.') + for ch in [':', '/', '?', '=', '%', '+']: + file_name = file_name.replace(ch, '.') dest = Path(cli_parsed.d) / 'source' / f'{file_name}.txt' + dest.parent.mkdir(parents=True, exist_ok=True) - with open(dest, 'wb') as sf: - sf.write(src_bytes) + dest.write_bytes(src_bytes) http_object.source_path = str(dest) - except Exception as e: - print(f'[!] Warning: failed to write page source for {http_object.remote_system}: {e}') - - - # Take screenshot - properly sanitize filename - def sanitize_filename(url): - import re - # Remove protocol and sanitize all unsafe characters - filename = re.sub(r'^https?://', '', url) - # Replace all non-alphanumeric characters (except hyphens and dots) with underscores - filename = re.sub(r'[^a-zA-Z0-9\-\.]', '_', filename) - # Limit length to prevent filesystem issues - return filename[:200] - - safe_filename = sanitize_filename(http_object.remote_system) - screenshot_path = Path(cli_parsed.d) / 'screens' / f'{safe_filename}.png' + except Exception as exc: + print(f'[!] Warning: failed to write page source for ' + f'{http_object.remote_system}: {exc}') + + # Screenshot filename (safe for all OS path rules) + import re + safe_name = re.sub(r'^https?://', '', http_object.remote_system) + safe_name = re.sub(r'[^a-zA-Z0-9\-\.]', '_', safe_name)[:200] + screenshot_path = Path(cli_parsed.d) / 'screens' / f'{safe_name}.png' driver.save_screenshot(str(screenshot_path)) http_object.screenshot_path = str(screenshot_path) - + print(f'[+] Captured screenshot: {http_object.remote_system}') - + except TimeoutException: print(f'[*] Timeout connecting to {http_object.remote_system}') driver.quit() driver = create_driver(cli_parsed, ua) http_object.error_state = 'Timeout' - - except Exception as e: - error_msg = str(e).lower() - - # Enhanced error handling with specific error types + + except Exception as exc: + error_msg = str(exc).lower() + if 'net::err_connection_reset' in error_msg: - print(f'[*] Connection reset by {http_object.remote_system} - target may be blocking requests') + print(f'[*] Connection reset by {http_object.remote_system}') http_object.error_state = 'Connection Reset' elif 'net::err_connection_refused' in error_msg: - print(f'[*] Connection refused by {http_object.remote_system} - service may be down') + print(f'[*] Connection refused by {http_object.remote_system}') http_object.error_state = 'Connection Refused' elif 'net::err_timed_out' in error_msg or 'timeout' in error_msg: print(f'[*] Timeout connecting to {http_object.remote_system}') @@ -306,62 +369,92 @@ def sanitize_filename(url): print(f'[*] SSL/Certificate error for {http_object.remote_system}') http_object.error_state = 'SSL Error' elif 'chrome not reachable' in error_msg or 'session deleted' in error_msg: - print(f'[*] Chrome driver crashed while accessing {http_object.remote_system} - restarting') + print(f'[*] Chrome driver crashed – restarting') http_object.error_state = 'Driver Crashed' - # Force driver restart try: driver.quit() - except: + except Exception: pass driver = create_driver(cli_parsed, ua) return http_object, driver else: - print(f'[*] Error capturing screenshot for {http_object.remote_system}: {e}') + print(f'[*] Error capturing {http_object.remote_system}: {exc}') http_object.error_state = 'Error' - - # Test if driver is still responsive + + # Check whether the driver is still alive try: driver.get('about:blank') - except: - print(f'[*] Chrome driver became unresponsive - restarting') + except Exception: + print('[*] Chrome driver became unresponsive – restarting') try: driver.quit() - except: + except Exception: pass driver = create_driver(cli_parsed, ua) - + return http_object, driver +# ── Browser availability helpers ────────────────────────────────────────────── + def check_browsers_available(): - """Check if Chrome/Chromium is available""" + """Check whether a usable Chrome/Chromium binary and ChromeDriver exist. + + Returns: + dict: {browsers: list[str], chromedriver: bool, ready: bool} + """ browsers = [] - - # Check for Chrome/Chromium binaries - for browser in ['google-chrome', 'chromium-browser', 'chromium']: - if shutil.which(browser): - browsers.append(browser) - - # Check for chromedriver + + # Ask platform_mgr for the canonical browser path first + chromium_exe = platform_mgr.find_chromium_executable() + if chromium_exe: + browsers.append(chromium_exe) + + # Also sweep PATH for well-known binary names (platform-aware) + if platform_mgr.is_windows: + path_names = ['chrome', 'chrome.exe'] + elif platform_mgr.is_mac: + path_names = ['google-chrome', 'chromium'] + else: + path_names = ['google-chrome', 'chromium-browser', 'chromium'] + + for name in path_names: + found = shutil.which(name) + if found and found not in browsers: + browsers.append(found) + + # Sweep the hardcoded OS paths from platform_mgr + for path in platform_mgr.get_chromium_paths(): + if Path(path).exists() and path not in browsers: + browsers.append(path) + chromedriver_available = find_chromedriver() is not None - + return { - 'browsers': browsers, + 'browsers': browsers, 'chromedriver': chromedriver_available, - 'ready': len(browsers) > 0 and chromedriver_available + 'ready': bool(browsers) and chromedriver_available, } def get_browser_info(): - """Get information about the browser setup""" + """Print a human-readable summary of the browser setup.""" status = check_browsers_available() - - print(f"[*] Browser Status:") - print(f" Available browsers: {', '.join(status['browsers']) if status['browsers'] else 'None'}") + + print('[*] Browser status:') + if status['browsers']: + for b in status['browsers']: + print(f' Browser : {b}') + else: + print(' Browser : None found') print(f" ChromeDriver: {'Available' if status['chromedriver'] else 'Missing'}") print(f" Ready for screenshots: {'Yes' if status['ready'] else 'No'}") - + if not status['ready']: - print("[*] Run setup script to install: sudo ./setup/setup.sh") - - return status \ No newline at end of file + if platform_mgr.is_windows: + print('[*] Install Chrome and ChromeDriver, then re-run the setup script:') + print(' .\\setup\\setup.ps1') + else: + print('[*] Run setup script to install: sudo ./setup/setup.sh') + + return status