From dc7f8493618373f013ca8d2955b665072efc54a7 Mon Sep 17 00:00:00 2001 From: liuwei Date: Tue, 13 Jan 2026 09:09:06 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=BE=A4=E6=80=BB=E7=BB=93?= =?UTF-8?q?=E9=97=AE=E9=A2=98=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- utils/sehuatang/shehuatang_undetected.py | 70 +++++++++++++++++++----- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/utils/sehuatang/shehuatang_undetected.py b/utils/sehuatang/shehuatang_undetected.py index a234095..f75753b 100644 --- a/utils/sehuatang/shehuatang_undetected.py +++ b/utils/sehuatang/shehuatang_undetected.py @@ -4,11 +4,12 @@ import requests from io import BytesIO import undetected_chromedriver as uc -if os.name == 'nt': - try: - uc.Chrome.__del__ = lambda self: None - except Exception: - pass +# 注意:不要禁用析构函数,否则会导致Chrome进程泄漏 +# if os.name == 'nt': +# try: +# uc.Chrome.__del__ = lambda self: None +# except Exception: +# pass from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC @@ -55,16 +56,30 @@ def add_pdf_encryption(pdf_file, password="4000"): def fetch_and_create_pdf(url): driver = None + service = None try: options = uc.ChromeOptions() # 规避检测的关键配置 - options.headless = False + # 在Linux服务器上使用headless模式 + if os.name != 'nt': + options.headless = True + options.add_argument('--headless=new') # 使用新版headless模式 + else: + options.headless = False + options.add_argument('--no-sandbox') options.add_argument('--disable-gpu') options.add_argument('--disable-dev-shm-usage') + options.add_argument('--disable-extensions') + options.add_argument('--disable-background-networking') + # 确保进程能被正确清理 + options.add_argument('--disable-crash-reporter') + options.add_argument('--disable-in-process-stack-traces') + options.add_argument('--disable-logging') + options.add_argument('--disable-dev-shm-usage') - # 如果依然在 Headless 触发检测,建议第一次运行设为 False 手动通过 - driver = uc.Chrome(options=options) + # 创建driver实例 + driver = uc.Chrome(options=options, version_main=None) logger.info(f"正在访问: {url}") driver.get(url) @@ -161,15 +176,42 @@ def fetch_and_create_pdf(url): logger.exception(f"抓取异常: {e}") return "" finally: - # --- 解决 [WinError 6] 句柄无效的关键 --- + # --- 确保Chrome进程被完全关闭 --- if driver: try: logger.debug("正在安全关闭浏览器...") - driver.close() # 先关闭窗口 - driver.quit() # 再退出进程 - except Exception: - # 捕获因句柄失效导致的退出异常,避免污染控制台 - pass + # 先关闭所有标签页和窗口 + try: + driver.close() + except Exception as e: + logger.warning(f"关闭浏览器窗口时出错: {e}") + + # 强制退出所有Chrome进程 + driver.quit() + logger.debug("浏览器已完全关闭") + except Exception as e: + logger.error(f"关闭浏览器时出错: {e}") + + # 额外保险:强制清理残留的Chrome进程(仅Linux) + if os.name != 'nt': + try: + import psutil + current_user = os.getlogin() + for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'username']): + try: + if proc.info['name'] and 'chrome' in proc.info['name'].lower(): + if proc.info['username'] == current_user: + # 检查是否是本次启动的chrome进程(通过命令行参数判断) + cmdline = proc.info.get('cmdline', []) + if cmdline and any('--user-data-dir=/tmp/playwright' in str(cmd) for cmd in cmdline): + logger.info(f"强制终止残留Chrome进程: PID={proc.info['pid']}") + proc.kill() + except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): + pass + except ImportError: + logger.debug("未安装psutil,跳过强制清理") + except Exception as e: + logger.warning(f"强制清理Chrome进程时出错: {e}") def pdf_file_path_undetected():