优化群总结问题。

This commit is contained in:
liuwei
2026-01-13 09:09:06 +08:00
parent 9ed201bf33
commit dc7f849361

View File

@@ -4,11 +4,12 @@ import requests
from io import BytesIO
import undetected_chromedriver as uc
if os.name == 'nt':
try:
uc.Chrome.__del__ = lambda self: None
except Exception:
pass
# 注意不要禁用析构函数否则会导致Chrome进程泄漏
# if os.name == 'nt':
# try:
# uc.Chrome.__del__ = lambda self: None
# except Exception:
# pass
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
@@ -55,16 +56,30 @@ def add_pdf_encryption(pdf_file, password="4000"):
def fetch_and_create_pdf(url):
driver = None
service = None
try:
options = uc.ChromeOptions()
# 规避检测的关键配置
options.headless = False
# 在Linux服务器上使用headless模式
if os.name != 'nt':
options.headless = True
options.add_argument('--headless=new') # 使用新版headless模式
else:
options.headless = False
options.add_argument('--no-sandbox')
options.add_argument('--disable-gpu')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-extensions')
options.add_argument('--disable-background-networking')
# 确保进程能被正确清理
options.add_argument('--disable-crash-reporter')
options.add_argument('--disable-in-process-stack-traces')
options.add_argument('--disable-logging')
options.add_argument('--disable-dev-shm-usage')
# 如果依然在 Headless 触发检测,建议第一次运行设为 False 手动通过
driver = uc.Chrome(options=options)
# 创建driver实例
driver = uc.Chrome(options=options, version_main=None)
logger.info(f"正在访问: {url}")
driver.get(url)
@@ -161,15 +176,42 @@ def fetch_and_create_pdf(url):
logger.exception(f"抓取异常: {e}")
return ""
finally:
# --- 解决 [WinError 6] 句柄无效的关键 ---
# --- 确保Chrome进程被完全关闭 ---
if driver:
try:
logger.debug("正在安全关闭浏览器...")
driver.close() # 先关闭窗口
driver.quit() # 再退出进程
except Exception:
# 捕获因句柄失效导致的退出异常,避免污染控制台
pass
# 先关闭所有标签页和窗口
try:
driver.close()
except Exception as e:
logger.warning(f"关闭浏览器窗口时出错: {e}")
# 强制退出所有Chrome进程
driver.quit()
logger.debug("浏览器已完全关闭")
except Exception as e:
logger.error(f"关闭浏览器时出错: {e}")
# 额外保险强制清理残留的Chrome进程仅Linux
if os.name != 'nt':
try:
import psutil
current_user = os.getlogin()
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'username']):
try:
if proc.info['name'] and 'chrome' in proc.info['name'].lower():
if proc.info['username'] == current_user:
# 检查是否是本次启动的chrome进程通过命令行参数判断
cmdline = proc.info.get('cmdline', [])
if cmdline and any('--user-data-dir=/tmp/playwright' in str(cmd) for cmd in cmdline):
logger.info(f"强制终止残留Chrome进程: PID={proc.info['pid']}")
proc.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
except ImportError:
logger.debug("未安装psutil跳过强制清理")
except Exception as e:
logger.warning(f"强制清理Chrome进程时出错: {e}")
def pdf_file_path_undetected():