优化群总结问题。
This commit is contained in:
@@ -4,11 +4,12 @@ import requests
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
|
|
||||||
if os.name == 'nt':
|
# 注意:不要禁用析构函数,否则会导致Chrome进程泄漏
|
||||||
try:
|
# if os.name == 'nt':
|
||||||
uc.Chrome.__del__ = lambda self: None
|
# try:
|
||||||
except Exception:
|
# uc.Chrome.__del__ = lambda self: None
|
||||||
pass
|
# except Exception:
|
||||||
|
# pass
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
@@ -55,16 +56,30 @@ def add_pdf_encryption(pdf_file, password="4000"):
|
|||||||
|
|
||||||
def fetch_and_create_pdf(url):
|
def fetch_and_create_pdf(url):
|
||||||
driver = None
|
driver = None
|
||||||
|
service = None
|
||||||
try:
|
try:
|
||||||
options = uc.ChromeOptions()
|
options = uc.ChromeOptions()
|
||||||
# 规避检测的关键配置
|
# 规避检测的关键配置
|
||||||
options.headless = False
|
# 在Linux服务器上使用headless模式
|
||||||
|
if os.name != 'nt':
|
||||||
|
options.headless = True
|
||||||
|
options.add_argument('--headless=new') # 使用新版headless模式
|
||||||
|
else:
|
||||||
|
options.headless = False
|
||||||
|
|
||||||
options.add_argument('--no-sandbox')
|
options.add_argument('--no-sandbox')
|
||||||
options.add_argument('--disable-gpu')
|
options.add_argument('--disable-gpu')
|
||||||
options.add_argument('--disable-dev-shm-usage')
|
options.add_argument('--disable-dev-shm-usage')
|
||||||
|
options.add_argument('--disable-extensions')
|
||||||
|
options.add_argument('--disable-background-networking')
|
||||||
|
# 确保进程能被正确清理
|
||||||
|
options.add_argument('--disable-crash-reporter')
|
||||||
|
options.add_argument('--disable-in-process-stack-traces')
|
||||||
|
options.add_argument('--disable-logging')
|
||||||
|
options.add_argument('--disable-dev-shm-usage')
|
||||||
|
|
||||||
# 如果依然在 Headless 触发检测,建议第一次运行设为 False 手动通过
|
# 创建driver实例
|
||||||
driver = uc.Chrome(options=options)
|
driver = uc.Chrome(options=options, version_main=None)
|
||||||
|
|
||||||
logger.info(f"正在访问: {url}")
|
logger.info(f"正在访问: {url}")
|
||||||
driver.get(url)
|
driver.get(url)
|
||||||
@@ -161,15 +176,42 @@ def fetch_and_create_pdf(url):
|
|||||||
logger.exception(f"抓取异常: {e}")
|
logger.exception(f"抓取异常: {e}")
|
||||||
return ""
|
return ""
|
||||||
finally:
|
finally:
|
||||||
# --- 解决 [WinError 6] 句柄无效的关键 ---
|
# --- 确保Chrome进程被完全关闭 ---
|
||||||
if driver:
|
if driver:
|
||||||
try:
|
try:
|
||||||
logger.debug("正在安全关闭浏览器...")
|
logger.debug("正在安全关闭浏览器...")
|
||||||
driver.close() # 先关闭窗口
|
# 先关闭所有标签页和窗口
|
||||||
driver.quit() # 再退出进程
|
try:
|
||||||
except Exception:
|
driver.close()
|
||||||
# 捕获因句柄失效导致的退出异常,避免污染控制台
|
except Exception as e:
|
||||||
pass
|
logger.warning(f"关闭浏览器窗口时出错: {e}")
|
||||||
|
|
||||||
|
# 强制退出所有Chrome进程
|
||||||
|
driver.quit()
|
||||||
|
logger.debug("浏览器已完全关闭")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"关闭浏览器时出错: {e}")
|
||||||
|
|
||||||
|
# 额外保险:强制清理残留的Chrome进程(仅Linux)
|
||||||
|
if os.name != 'nt':
|
||||||
|
try:
|
||||||
|
import psutil
|
||||||
|
current_user = os.getlogin()
|
||||||
|
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'username']):
|
||||||
|
try:
|
||||||
|
if proc.info['name'] and 'chrome' in proc.info['name'].lower():
|
||||||
|
if proc.info['username'] == current_user:
|
||||||
|
# 检查是否是本次启动的chrome进程(通过命令行参数判断)
|
||||||
|
cmdline = proc.info.get('cmdline', [])
|
||||||
|
if cmdline and any('--user-data-dir=/tmp/playwright' in str(cmd) for cmd in cmdline):
|
||||||
|
logger.info(f"强制终止残留Chrome进程: PID={proc.info['pid']}")
|
||||||
|
proc.kill()
|
||||||
|
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||||
|
pass
|
||||||
|
except ImportError:
|
||||||
|
logger.debug("未安装psutil,跳过强制清理")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"强制清理Chrome进程时出错: {e}")
|
||||||
|
|
||||||
|
|
||||||
def pdf_file_path_undetected():
|
def pdf_file_path_undetected():
|
||||||
|
|||||||
Reference in New Issue
Block a user