优化群总结问题。
This commit is contained in:
@@ -4,11 +4,12 @@ import requests
|
||||
from io import BytesIO
|
||||
import undetected_chromedriver as uc
|
||||
|
||||
if os.name == 'nt':
|
||||
try:
|
||||
uc.Chrome.__del__ = lambda self: None
|
||||
except Exception:
|
||||
pass
|
||||
# 注意:不要禁用析构函数,否则会导致Chrome进程泄漏
|
||||
# if os.name == 'nt':
|
||||
# try:
|
||||
# uc.Chrome.__del__ = lambda self: None
|
||||
# except Exception:
|
||||
# pass
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
@@ -55,16 +56,30 @@ def add_pdf_encryption(pdf_file, password="4000"):
|
||||
|
||||
def fetch_and_create_pdf(url):
|
||||
driver = None
|
||||
service = None
|
||||
try:
|
||||
options = uc.ChromeOptions()
|
||||
# 规避检测的关键配置
|
||||
options.headless = False
|
||||
# 在Linux服务器上使用headless模式
|
||||
if os.name != 'nt':
|
||||
options.headless = True
|
||||
options.add_argument('--headless=new') # 使用新版headless模式
|
||||
else:
|
||||
options.headless = False
|
||||
|
||||
options.add_argument('--no-sandbox')
|
||||
options.add_argument('--disable-gpu')
|
||||
options.add_argument('--disable-dev-shm-usage')
|
||||
options.add_argument('--disable-extensions')
|
||||
options.add_argument('--disable-background-networking')
|
||||
# 确保进程能被正确清理
|
||||
options.add_argument('--disable-crash-reporter')
|
||||
options.add_argument('--disable-in-process-stack-traces')
|
||||
options.add_argument('--disable-logging')
|
||||
options.add_argument('--disable-dev-shm-usage')
|
||||
|
||||
# 如果依然在 Headless 触发检测,建议第一次运行设为 False 手动通过
|
||||
driver = uc.Chrome(options=options)
|
||||
# 创建driver实例
|
||||
driver = uc.Chrome(options=options, version_main=None)
|
||||
|
||||
logger.info(f"正在访问: {url}")
|
||||
driver.get(url)
|
||||
@@ -161,15 +176,42 @@ def fetch_and_create_pdf(url):
|
||||
logger.exception(f"抓取异常: {e}")
|
||||
return ""
|
||||
finally:
|
||||
# --- 解决 [WinError 6] 句柄无效的关键 ---
|
||||
# --- 确保Chrome进程被完全关闭 ---
|
||||
if driver:
|
||||
try:
|
||||
logger.debug("正在安全关闭浏览器...")
|
||||
driver.close() # 先关闭窗口
|
||||
driver.quit() # 再退出进程
|
||||
except Exception:
|
||||
# 捕获因句柄失效导致的退出异常,避免污染控制台
|
||||
pass
|
||||
# 先关闭所有标签页和窗口
|
||||
try:
|
||||
driver.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"关闭浏览器窗口时出错: {e}")
|
||||
|
||||
# 强制退出所有Chrome进程
|
||||
driver.quit()
|
||||
logger.debug("浏览器已完全关闭")
|
||||
except Exception as e:
|
||||
logger.error(f"关闭浏览器时出错: {e}")
|
||||
|
||||
# 额外保险:强制清理残留的Chrome进程(仅Linux)
|
||||
if os.name != 'nt':
|
||||
try:
|
||||
import psutil
|
||||
current_user = os.getlogin()
|
||||
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'username']):
|
||||
try:
|
||||
if proc.info['name'] and 'chrome' in proc.info['name'].lower():
|
||||
if proc.info['username'] == current_user:
|
||||
# 检查是否是本次启动的chrome进程(通过命令行参数判断)
|
||||
cmdline = proc.info.get('cmdline', [])
|
||||
if cmdline and any('--user-data-dir=/tmp/playwright' in str(cmd) for cmd in cmdline):
|
||||
logger.info(f"强制终止残留Chrome进程: PID={proc.info['pid']}")
|
||||
proc.kill()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
pass
|
||||
except ImportError:
|
||||
logger.debug("未安装psutil,跳过强制清理")
|
||||
except Exception as e:
|
||||
logger.warning(f"强制清理Chrome进程时出错: {e}")
|
||||
|
||||
|
||||
def pdf_file_path_undetected():
|
||||
|
||||
Reference in New Issue
Block a user