重构:新增定时插件业务逻辑内聚到各自插件目录
- daily_news 插件内置百度新闻与60s图片获取逻辑,移除对 base.func_news 的业务依赖\n- epic_free 插件内置周五判断与免费游戏抓取逻辑,移除对 base.func_epic 的业务依赖\n- daily_ranking 插件内置排行生成与积分奖励逻辑,不再依赖 MessageStorage 业务封装\n- sehuatang_push 改为引用插件目录内的抓取与PDF生成实现,将核心业务代码迁入插件目录\n- 确保新插件可独立承载自身业务逻辑,平台层仅提供调度与基础设施能力
This commit is contained in:
@@ -4,8 +4,8 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from base.plugin_common.message_plugin_interface import MessagePluginInterface
|
||||
from base.plugin_common.plugin_interface import PluginStatus
|
||||
from utils.sehuatang.shehuatang import pdf_file_path
|
||||
from utils.sehuatang.shehuatang_undetected import pdf_file_path_undetected
|
||||
from plugins.sehuatang_push.shehuatang import pdf_file_path
|
||||
from plugins.sehuatang_push.shehuatang_undetected import pdf_file_path_undetected
|
||||
|
||||
|
||||
class SehuatangPushPlugin(MessagePluginInterface):
|
||||
|
||||
311
plugins/sehuatang_push/shehuatang.py
Normal file
311
plugins/sehuatang_push/shehuatang.py
Normal file
@@ -0,0 +1,311 @@
|
||||
import time
|
||||
import os
|
||||
import requests
|
||||
from io import BytesIO
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from bs4 import BeautifulSoup
|
||||
from reportlab.lib.pagesizes import letter, A3
|
||||
from reportlab.lib import colors
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, PageBreak
|
||||
from reportlab.lib.styles import getSampleStyleSheet
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from datetime import datetime
|
||||
from PIL import Image as PILImage
|
||||
import re
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
# download_image 函数保持不变
|
||||
def download_image(url):
|
||||
"""下载大于100KB的图片并返回临时文件路径,仅支持jpg、jpeg和png格式"""
|
||||
try:
|
||||
if not url.lower().endswith(('.jpg', '.jpeg', '.png')):
|
||||
return None
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Referer': 'https://tu.a7nz4.us',
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
image = BytesIO(response.content)
|
||||
return image
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"下载图片失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def fetch_and_create_pdf(url):
|
||||
"""根据给定URL抓取页面并生成PDF"""
|
||||
driver = None
|
||||
try:
|
||||
# 配置Selenium
|
||||
options = Options()
|
||||
options.add_argument('--headless') # 使用新的headless模式
|
||||
options.add_argument('--disable-gpu')
|
||||
options.add_argument('--no-sandbox')
|
||||
options.add_argument('--disable-dev-shm-usage') # 添加Linux特定配置
|
||||
options.add_argument('--disable-logging')
|
||||
options.add_argument('--log-level=3')
|
||||
options.add_experimental_option('excludeSwitches', ['enable-automation', 'enable-logging'])
|
||||
|
||||
# 根据操作系统选择不同的ChromeDriver路径处理方式
|
||||
if os.name == 'nt': # Windows
|
||||
chrome_driver_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
"utils", "chromedriver", "chromedriver.exe"
|
||||
)
|
||||
else: # Linux
|
||||
chrome_driver_path = '/usr/bin/chromedriver' # 使用系统PATH中的chromedriver
|
||||
|
||||
try:
|
||||
if os.name == 'nt' and not os.path.exists(chrome_driver_path):
|
||||
chrome_driver_path = ChromeDriverManager().install()
|
||||
service = Service(chrome_driver_path, log_path=os.devnull)
|
||||
driver = webdriver.Chrome(service=service, options=options)
|
||||
except Exception as e:
|
||||
logger.debug(f"初始化ChromeDriver失败: {e}")
|
||||
chrome_driver_path = ChromeDriverManager().install()
|
||||
service = Service(chrome_driver_path, log_path=os.devnull)
|
||||
driver = webdriver.Chrome(service=service, options=options)
|
||||
|
||||
# 获取目标页面
|
||||
driver.get(url)
|
||||
try:
|
||||
enter_button = WebDriverWait(driver, 5).until(
|
||||
EC.element_to_be_clickable((By.XPATH, '//a[contains(text(), "满18岁,请点此进入")]')))
|
||||
enter_button.click()
|
||||
logger.debug("点击了满18岁按钮")
|
||||
except Exception as e:
|
||||
logger.warning(f"未找到满18岁按钮,跳过此步骤: {e}")
|
||||
WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tbody[id^="normalthread"]')))
|
||||
|
||||
# 处理年龄验证按钮
|
||||
try:
|
||||
enter_button = driver.find_element(By.XPATH, '//a[contains(text(), "满18岁,请点此进入")]')
|
||||
enter_button.click()
|
||||
logger.debug("点击了满18岁按钮")
|
||||
time.sleep(5)
|
||||
except Exception as e:
|
||||
logger.warning(f"未找到满18岁按钮,跳过此步骤: {e}")
|
||||
|
||||
# 解析页面
|
||||
html = driver.page_source
|
||||
soup = BeautifulSoup(html, 'html.parser', from_encoding='utf-8')
|
||||
posts = soup.find_all('tbody', {'id': lambda x: x and x.startswith('normalthread')})
|
||||
|
||||
# 获取今天的日期
|
||||
today = datetime.now().strftime('%Y-%m-%d')
|
||||
|
||||
# 注册中文字体
|
||||
pdfmetrics.registerFont(TTFont('SimHei', 'fonts/simhei.ttf'))
|
||||
styles = getSampleStyleSheet()
|
||||
|
||||
# 设置样式
|
||||
title_style = styles['Heading1']
|
||||
title_style.fontName = 'SimHei'
|
||||
title_style.fontSize = 14
|
||||
title_style.textColor = colors.red
|
||||
title_style.bold = True
|
||||
|
||||
normal_style = styles['Normal']
|
||||
normal_style.fontName = 'SimHei'
|
||||
normal_style.fontSize = 14
|
||||
|
||||
content = []
|
||||
|
||||
# 过滤当天帖子并倒序
|
||||
today_posts = []
|
||||
for post in posts:
|
||||
post_time_span = post.find('span', {'class': 'xi1'})
|
||||
if post_time_span:
|
||||
today_posts.append(post)
|
||||
today_posts = today_posts[::-1] # 倒序处理
|
||||
|
||||
# 设置PDF - 保存到 temp/JAV 目录
|
||||
base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
pdf_filename = os.path.join(base_dir, 'temp', 'JAV', f"JAV-{today}-{len(today_posts)}.pdf")
|
||||
|
||||
# 确保目录存在
|
||||
pdf_dir = os.path.dirname(pdf_filename)
|
||||
if not os.path.exists(pdf_dir):
|
||||
os.makedirs(pdf_dir)
|
||||
doc = SimpleDocTemplate(pdf_filename, pagesize=A3)
|
||||
|
||||
# 计算内容区域的宽度和高度
|
||||
page_width, page_height = A3
|
||||
content_width = page_width - doc.rightMargin - doc.leftMargin
|
||||
content_height = page_height - doc.topMargin - doc.bottomMargin
|
||||
|
||||
# 设置最大图片尺寸,留出一些边距
|
||||
max_image_width = content_width * 0.95
|
||||
max_image_height = content_height * 0.7 # 留出足够空间给文本和其他元素
|
||||
|
||||
# 遍历帖子
|
||||
session = requests.Session()
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Referer': 'https://www.sehuatang.net/'
|
||||
})
|
||||
for c in driver.get_cookies():
|
||||
try:
|
||||
session.cookies.set(c['name'], c['value'], domain=c.get('domain'), path=c.get('path', '/'))
|
||||
except Exception:
|
||||
session.cookies.set(c['name'], c['value'])
|
||||
for post in today_posts:
|
||||
title = post.find('a', {'class': 's xst'})
|
||||
if title:
|
||||
post_title = title.get_text()
|
||||
post_url = title.get('href')
|
||||
logger.info(post_title)
|
||||
|
||||
# 获取帖子内容
|
||||
post_page_url = 'https://www.sehuatang.net/' + post_url
|
||||
try:
|
||||
resp = session.get(post_page_url, timeout=15)
|
||||
resp.raise_for_status()
|
||||
post_html = resp.text
|
||||
except Exception as e:
|
||||
logger.warning(f"获取帖子内容失败: {e}")
|
||||
continue
|
||||
post_soup = BeautifulSoup(post_html, 'html.parser', from_encoding='utf-8')
|
||||
content_div = post_soup.find('div', {'class': 't_fsz'})
|
||||
|
||||
if content_div:
|
||||
# 提取文本和磁力链接
|
||||
post_text = content_div.get_text(strip=True)
|
||||
magnet_links = re.findall(r'magnet:\?[^ \u4e00-\u9fff]+', post_text)
|
||||
|
||||
# 添加标题
|
||||
content.append(Paragraph(f" {post_title}", title_style))
|
||||
content.append(Spacer(1, 5))
|
||||
|
||||
# 添加磁力链接
|
||||
if magnet_links:
|
||||
for magnet_link in magnet_links:
|
||||
content.append(Paragraph(f"<br /><b>{magnet_link}</b><br />", normal_style))
|
||||
content.append(Spacer(1, 12))
|
||||
|
||||
# 添加图片
|
||||
image_links = []
|
||||
images = content_div.find_all('img')
|
||||
for img in images:
|
||||
if img.get('zoomfile') and 'http' in img.get('zoomfile'):
|
||||
image_links.append(img.get('zoomfile'))
|
||||
|
||||
if image_links:
|
||||
for img_link in image_links:
|
||||
image = download_image(img_link)
|
||||
if image:
|
||||
try:
|
||||
# 使用PIL处理图片尺寸
|
||||
with PILImage.open(image) as img:
|
||||
img_width, img_height = img.size
|
||||
# 计算缩放比例,确保图片适应页面
|
||||
scale_width = max_image_width / img_width
|
||||
scale_height = max_image_height / img_height
|
||||
scale = min(scale_width, scale_height, 1.0) # 不超过原始大小
|
||||
|
||||
# 计算新的尺寸
|
||||
new_width = img_width * scale
|
||||
new_height = img_height * scale
|
||||
|
||||
# 重置文件指针
|
||||
image.seek(0)
|
||||
img_stream = BytesIO(image.getvalue())
|
||||
|
||||
# 添加图片到内容中,使用计算后的尺寸
|
||||
content.append(Image(img_stream, width=new_width, height=new_height))
|
||||
content.append(Spacer(1, 4))
|
||||
logger.debug(
|
||||
f"处理图片: 原始尺寸 {img_width}x{img_height}, 新尺寸 {new_width}x{new_height}")
|
||||
except Exception as e:
|
||||
logger.error(f"处理图片时出错: {e}")
|
||||
|
||||
# 在每个帖子后添加分页符(除了最后一页)
|
||||
if post != today_posts[-1]:
|
||||
content.append(PageBreak())
|
||||
|
||||
# 生成PDF
|
||||
try:
|
||||
doc.build(content)
|
||||
absolute_pdf_path = os.path.abspath(pdf_filename)
|
||||
logger.info(f"PDF saved as {absolute_pdf_path}")
|
||||
|
||||
# 加密PDF
|
||||
add_pdf_encryption(absolute_pdf_path)
|
||||
return absolute_pdf_path
|
||||
except Exception as e:
|
||||
logger.error(f"生成PDF时出错: {e}")
|
||||
# 如果生成失败,返回一个默认路径或空字符串
|
||||
return ""
|
||||
except Exception as e:
|
||||
logger.error(f"抓取帖子时出错: {e}")
|
||||
# 如果抓取失败,返回一个默认路径或空字符串
|
||||
return ""
|
||||
finally:
|
||||
# 确保在所有情况下都关闭driver
|
||||
if driver:
|
||||
try:
|
||||
driver.quit()
|
||||
logger.debug("Chrome driver已成功关闭")
|
||||
except Exception as e:
|
||||
logger.error(f"关闭Chrome driver时出错: {e}")
|
||||
# 在极端情况下尝试强制结束进程
|
||||
try:
|
||||
import psutil
|
||||
process = psutil.Process(driver.service.process.pid)
|
||||
process.terminate()
|
||||
logger.debug("已强制终止Chrome进程")
|
||||
except Exception as e2:
|
||||
logger.error(f"强制终止Chrome进程失败: {e2}")
|
||||
|
||||
|
||||
# add_pdf_encryption 和 pdf_file_path 函数保持不变
|
||||
def add_pdf_encryption(pdf_file, password="4000"):
|
||||
"""使用PyPDF2为PDF添加加密保护"""
|
||||
try:
|
||||
pdf_writer = PdfWriter()
|
||||
pdf_reader = PdfReader(pdf_file)
|
||||
for page_num in range(len(pdf_reader.pages)):
|
||||
pdf_writer.add_page(pdf_reader.pages[page_num])
|
||||
pdf_writer.encrypt(password)
|
||||
with open(pdf_file, "wb") as output_pdf:
|
||||
pdf_writer.write(output_pdf)
|
||||
logger.debug(f"PDF加密成功,密码为: {password}")
|
||||
except Exception as e:
|
||||
logger.error(f"PDF加密失败: {e}")
|
||||
|
||||
|
||||
def pdf_file_path():
|
||||
try:
|
||||
url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
|
||||
pdf_path = fetch_and_create_pdf(url)
|
||||
if pdf_path:
|
||||
logger.info(f"返回的PDF文件路径:{pdf_path}")
|
||||
return True, pdf_path
|
||||
else:
|
||||
# 如果生成失败,返回一个默认的PDF路径
|
||||
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
||||
logger.info(f"PDF生成失败,返回默认路径: {default_path}")
|
||||
return False, default_path
|
||||
except Exception as e:
|
||||
logger.error(f"生成PDF路径时出错: {e}")
|
||||
# 返回一个默认路径
|
||||
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
||||
return False, default_path
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pdf_file_path()
|
||||
241
plugins/sehuatang_push/shehuatang_undetected.py
Normal file
241
plugins/sehuatang_push/shehuatang_undetected.py
Normal file
@@ -0,0 +1,241 @@
|
||||
import time
|
||||
import os
|
||||
import requests
|
||||
from io import BytesIO
|
||||
import undetected_chromedriver as uc
|
||||
|
||||
# 注意:不要禁用析构函数,否则会导致Chrome进程泄漏
|
||||
# if os.name == 'nt':
|
||||
# try:
|
||||
# uc.Chrome.__del__ = lambda self: None
|
||||
# except Exception:
|
||||
# pass
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from bs4 import BeautifulSoup
|
||||
from reportlab.lib.pagesizes import A3
|
||||
from reportlab.lib import colors
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, PageBreak
|
||||
from reportlab.lib.styles import getSampleStyleSheet
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from datetime import datetime
|
||||
from PIL import Image as PILImage
|
||||
import re
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def download_image(url, session):
|
||||
"""使用同步的 session 下载图片,确保 Cookie 一致"""
|
||||
try:
|
||||
if not url.lower().endswith(('.jpg', '.jpeg', '.png')):
|
||||
return None
|
||||
response = session.get(url, timeout=15)
|
||||
response.raise_for_status()
|
||||
return BytesIO(response.content)
|
||||
except Exception as e:
|
||||
logger.warning(f"下载图片失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def add_pdf_encryption(pdf_file, password="4000"):
|
||||
try:
|
||||
pdf_writer = PdfWriter()
|
||||
pdf_reader = PdfReader(pdf_file)
|
||||
for page in pdf_reader.pages:
|
||||
pdf_writer.add_page(page)
|
||||
pdf_writer.encrypt(password)
|
||||
with open(pdf_file, "wb") as output_pdf:
|
||||
pdf_writer.write(output_pdf)
|
||||
logger.debug("PDF加密成功")
|
||||
except Exception as e:
|
||||
logger.error(f"PDF加密失败: {e}")
|
||||
|
||||
|
||||
def fetch_and_create_pdf(url):
|
||||
driver = None
|
||||
service = None
|
||||
try:
|
||||
options = uc.ChromeOptions()
|
||||
# 规避检测的关键配置
|
||||
# 在Linux服务器上使用headless模式
|
||||
if os.name != 'nt':
|
||||
options.headless = True
|
||||
options.add_argument('--headless=new') # 使用新版headless模式
|
||||
else:
|
||||
options.headless = False
|
||||
|
||||
options.add_argument('--no-sandbox')
|
||||
options.add_argument('--disable-gpu')
|
||||
options.add_argument('--disable-dev-shm-usage')
|
||||
options.add_argument('--disable-extensions')
|
||||
options.add_argument('--disable-background-networking')
|
||||
# 确保进程能被正确清理
|
||||
options.add_argument('--disable-crash-reporter')
|
||||
options.add_argument('--disable-in-process-stack-traces')
|
||||
options.add_argument('--disable-logging')
|
||||
options.add_argument('--disable-dev-shm-usage')
|
||||
|
||||
# 创建driver实例
|
||||
# 让 undetected_chromedriver 自动检测浏览器版本并下载匹配的 ChromeDriver
|
||||
# 强制指定版本为144,以匹配服务器当前的 Chrome 版本
|
||||
driver = uc.Chrome(options=options, version_main=144)
|
||||
|
||||
logger.info(f"正在访问: {url}")
|
||||
driver.get(url)
|
||||
|
||||
# 等待 Cloudflare 5秒盾结束,并处理“满18岁”按钮
|
||||
time.sleep(8)
|
||||
|
||||
try:
|
||||
enter_btn = WebDriverWait(driver, 10).until(
|
||||
EC.element_to_be_clickable((By.XPATH, '//a[contains(text(), "满18岁,请点此进入")]'))
|
||||
)
|
||||
enter_btn.click()
|
||||
logger.debug("点击了年龄确认按钮")
|
||||
time.sleep(3)
|
||||
except Exception:
|
||||
logger.debug("未发现年龄验证按钮,可能已过检测")
|
||||
|
||||
# 确保列表加载
|
||||
WebDriverWait(driver, 20).until(
|
||||
EC.presence_of_element_located((By.CSS_SELECTOR, 'tbody[id^="normalthread"]'))
|
||||
)
|
||||
|
||||
# 提取数据
|
||||
soup = BeautifulSoup(driver.page_source, 'html.parser')
|
||||
posts = [p for p in soup.find_all('tbody', {'id': lambda x: x and x.startswith('normalthread')}) if
|
||||
p.find('span', {'class': 'xi1'})]
|
||||
today_posts = posts[::-1]
|
||||
|
||||
# 字体注册
|
||||
pdfmetrics.registerFont(TTFont('SimHei', 'fonts/simhei.ttf'))
|
||||
styles = getSampleStyleSheet()
|
||||
title_style = styles['Heading1']
|
||||
title_style.fontName = 'SimHei'
|
||||
title_style.textColor = colors.red
|
||||
normal_style = styles['Normal']
|
||||
normal_style.fontName = 'SimHei'
|
||||
|
||||
# 路径逻辑 - 保存到 temp/JAV 目录
|
||||
base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
save_path = os.path.join(base_dir, 'temp', 'JAV')
|
||||
if not os.path.exists(save_path):
|
||||
os.makedirs(save_path)
|
||||
pdf_filename = os.path.join(save_path, f"JAV-{datetime.now().strftime('%Y-%m-%d')}-{len(today_posts)}.pdf")
|
||||
|
||||
doc = SimpleDocTemplate(pdf_filename, pagesize=A3)
|
||||
content = []
|
||||
max_w, max_h = (A3[0] - 72) * 0.95, (A3[1] - 72) * 0.7
|
||||
|
||||
# 同步 Session
|
||||
session = requests.Session()
|
||||
ua = driver.execute_script("return navigator.userAgent")
|
||||
session.headers.update({'User-Agent': ua, 'Referer': 'https://www.sehuatang.net/'})
|
||||
for c in driver.get_cookies():
|
||||
session.cookies.set(c['name'], c['value'])
|
||||
|
||||
# 循环帖子
|
||||
for post in today_posts:
|
||||
title_tag = post.find('a', {'class': 's xst'})
|
||||
if not title_tag: continue
|
||||
|
||||
p_title = title_tag.get_text()
|
||||
p_url = 'https://www.sehuatang.net/' + title_tag.get('href')
|
||||
logger.info(f"详情页: {p_title}")
|
||||
|
||||
try:
|
||||
resp = session.get(p_url, timeout=15)
|
||||
p_soup = BeautifulSoup(resp.text, 'html.parser')
|
||||
div = p_soup.find('div', {'class': 't_fsz'})
|
||||
|
||||
if div:
|
||||
content.append(Paragraph(f" {p_title}", title_style))
|
||||
magnets = re.findall(r'magnet:\?[^ \u4e00-\u9fff]+', div.get_text())
|
||||
for m in magnets:
|
||||
content.append(Paragraph(f"<b>{m}</b>", normal_style))
|
||||
|
||||
for img_tag in div.find_all('img'):
|
||||
src = img_tag.get('zoomfile')
|
||||
if src and 'http' in src:
|
||||
img_io = download_image(src, session)
|
||||
if img_io:
|
||||
with PILImage.open(img_io) as p_img:
|
||||
iw, ih = p_img.size
|
||||
sc = min(max_w / iw, max_h / ih, 1.0)
|
||||
img_io.seek(0)
|
||||
content.append(Image(img_io, width=iw * sc, height=ih * sc))
|
||||
|
||||
if post != today_posts[-1]: content.append(PageBreak())
|
||||
except Exception as e:
|
||||
logger.error(f"帖子处理失败: {e}")
|
||||
|
||||
doc.build(content)
|
||||
add_pdf_encryption(pdf_filename)
|
||||
return pdf_filename
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"抓取异常: {e}")
|
||||
return ""
|
||||
finally:
|
||||
# --- 确保Chrome进程被完全关闭 ---
|
||||
if driver:
|
||||
try:
|
||||
logger.debug("正在安全关闭浏览器...")
|
||||
# 先关闭所有标签页和窗口
|
||||
try:
|
||||
driver.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"关闭浏览器窗口时出错: {e}")
|
||||
|
||||
# 强制退出所有Chrome进程
|
||||
driver.quit()
|
||||
logger.debug("浏览器已完全关闭")
|
||||
except Exception as e:
|
||||
logger.error(f"关闭浏览器时出错: {e}")
|
||||
|
||||
# 额外保险:强制清理残留的Chrome进程(仅Linux)
|
||||
if os.name != 'nt':
|
||||
try:
|
||||
import psutil
|
||||
current_user = os.getlogin()
|
||||
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'username']):
|
||||
try:
|
||||
if proc.info['name'] and 'chrome' in proc.info['name'].lower():
|
||||
if proc.info['username'] == current_user:
|
||||
# 检查是否是本次启动的chrome进程(通过命令行参数判断)
|
||||
cmdline = proc.info.get('cmdline', [])
|
||||
if cmdline and any('--user-data-dir=/tmp/playwright' in str(cmd) for cmd in cmdline):
|
||||
logger.info(f"强制终止残留Chrome进程: PID={proc.info['pid']}")
|
||||
proc.kill()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
pass
|
||||
except ImportError:
|
||||
logger.debug("未安装psutil,跳过强制清理")
|
||||
except Exception as e:
|
||||
logger.warning(f"强制清理Chrome进程时出错: {e}")
|
||||
|
||||
|
||||
def pdf_file_path_undetected():
|
||||
try:
|
||||
url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
|
||||
pdf_path = fetch_and_create_pdf(url)
|
||||
if pdf_path:
|
||||
logger.info(f"返回的PDF文件路径:{pdf_path}")
|
||||
return True, pdf_path
|
||||
else:
|
||||
# 如果生成失败,返回一个默认的PDF路径
|
||||
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
||||
logger.info(f"PDF生成失败,返回默认路径: {default_path}")
|
||||
return False, default_path
|
||||
except Exception as e:
|
||||
logger.error(f"生成PDF路径时出错: {e}")
|
||||
# 返回一个默认路径
|
||||
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
||||
return False, default_path
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pdf_file_path_undetected()
|
||||
Reference in New Issue
Block a user