sehuatang pdf 优化

This commit is contained in:
liuwei
2025-12-25 15:36:02 +08:00
parent 408bf12342
commit f5a51fa2ec
3 changed files with 42 additions and 25 deletions

View File

@@ -702,12 +702,9 @@ class Robot:
async def generate_sehuatang_pdf(self):
try:
self.LOG.info("开始生成PDF,generate_sehuatang_pdf")
try:
path = pdf_file_path()
except Exception as e:
self.LOG.error(f"generate_sehuatang_pdf error: {e}")
path = pdf_file_path_undetected()
tag, path = pdf_file_path()
if not tag:
tag, path = pdf_file_path_undetected()
# 暂时只发4K群
await self.send_group_file_message(path, Feature.PDF_CAPABILITY)
except Exception as e:

View File

@@ -23,6 +23,7 @@ from PyPDF2 import PdfReader, PdfWriter
from loguru import logger
# download_image 函数保持不变
def download_image(url):
"""下载大于100KB的图片并返回临时文件路径仅支持jpg、jpeg和png格式"""
@@ -81,12 +82,14 @@ def fetch_and_create_pdf(url):
# 获取目标页面
driver.get(url)
try:
enter_button = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, '//a[contains(text(), "满18岁请点此进入")]')))
enter_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.XPATH, '//a[contains(text(), "满18岁请点此进入")]')))
enter_button.click()
logger.debug("点击了满18岁按钮")
except Exception as e:
logger.warning(f"未找到满18岁按钮跳过此步骤: {e}")
WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tbody[id^="normalthread"]')))
WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tbody[id^="normalthread"]')))
# 处理年龄验证按钮
try:
@@ -133,8 +136,8 @@ def fetch_and_create_pdf(url):
# 设置PDF
# 修改PDF文件路径到项目根目录的temp目录下
pdf_filename = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
'temp',
f"JAV-{today}-{len(today_posts)}.pdf")
'temp',
f"JAV-{today}-{len(today_posts)}.pdf")
doc = SimpleDocTemplate(pdf_filename, pagesize=A3)
# 计算内容区域的宽度和高度
@@ -222,7 +225,8 @@ def fetch_and_create_pdf(url):
# 添加图片到内容中,使用计算后的尺寸
content.append(Image(img_stream, width=new_width, height=new_height))
content.append(Spacer(1, 4))
logger.debug(f"处理图片: 原始尺寸 {img_width}x{img_height}, 新尺寸 {new_width}x{new_height}")
logger.debug(
f"处理图片: 原始尺寸 {img_width}x{img_height}, 新尺寸 {new_width}x{new_height}")
except Exception as e:
logger.error(f"处理图片时出错: {e}")
@@ -264,6 +268,7 @@ def fetch_and_create_pdf(url):
except Exception as e2:
logger.error(f"强制终止Chrome进程失败: {e2}")
# add_pdf_encryption 和 pdf_file_path 函数保持不变
def add_pdf_encryption(pdf_file, password="4000"):
"""使用PyPDF2为PDF添加加密保护"""
@@ -282,21 +287,21 @@ def add_pdf_encryption(pdf_file, password="4000"):
def pdf_file_path():
try:
url = 'https://www.sehuatang.org/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
pdf_path = fetch_and_create_pdf(url)
if pdf_path:
logger.info(f"返回的PDF文件路径{pdf_path}")
return pdf_path
return True, pdf_path
else:
# 如果生成失败返回一个默认的PDF路径
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
logger.info(f"PDF生成失败返回默认路径: {default_path}")
return default_path
return False, default_path
except Exception as e:
logger.error(f"生成PDF路径时出错: {e}")
# 返回一个默认路径
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
return default_path
return False, default_path
if __name__ == "__main__":

View File

@@ -3,6 +3,7 @@ import os
import requests
from io import BytesIO
import undetected_chromedriver as uc
if os.name == 'nt':
try:
uc.Chrome.__del__ = lambda self: None
@@ -172,8 +173,22 @@ def fetch_and_create_pdf(url):
def pdf_file_path_undetected():
url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
return fetch_and_create_pdf(url)
try:
url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
pdf_path = fetch_and_create_pdf(url)
if pdf_path:
logger.info(f"返回的PDF文件路径{pdf_path}")
return True, pdf_path
else:
# 如果生成失败返回一个默认的PDF路径
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
logger.info(f"PDF生成失败返回默认路径: {default_path}")
return False, default_path
except Exception as e:
logger.error(f"生成PDF路径时出错: {e}")
# 返回一个默认路径
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
return False, default_path
if __name__ == "__main__":