From f5a51fa2ec8f97bdb6957e977ea82854e1370f25 Mon Sep 17 00:00:00 2001 From: liuwei Date: Thu, 25 Dec 2025 15:36:02 +0800 Subject: [PATCH] =?UTF-8?q?sehuatang=20pdf=20=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- robot.py | 9 ++---- utils/sehuatang/shehuatang.py | 39 +++++++++++++----------- utils/sehuatang/shehuatang_undetected.py | 19 ++++++++++-- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/robot.py b/robot.py index a00215a..2e586b2 100644 --- a/robot.py +++ b/robot.py @@ -702,12 +702,9 @@ class Robot: async def generate_sehuatang_pdf(self): try: self.LOG.info("开始生成PDF,generate_sehuatang_pdf") - try: - path = pdf_file_path() - except Exception as e: - self.LOG.error(f"generate_sehuatang_pdf error: {e}") - path = pdf_file_path_undetected() - + tag, path = pdf_file_path() + if not tag: + tag, path = pdf_file_path_undetected() # 暂时只发4K群 await self.send_group_file_message(path, Feature.PDF_CAPABILITY) except Exception as e: diff --git a/utils/sehuatang/shehuatang.py b/utils/sehuatang/shehuatang.py index 5879548..2fcbf80 100644 --- a/utils/sehuatang/shehuatang.py +++ b/utils/sehuatang/shehuatang.py @@ -23,6 +23,7 @@ from PyPDF2 import PdfReader, PdfWriter from loguru import logger + # download_image 函数保持不变 def download_image(url): """下载大于100KB的图片并返回临时文件路径,仅支持jpg、jpeg和png格式""" @@ -57,7 +58,7 @@ def fetch_and_create_pdf(url): options.add_argument('--disable-logging') options.add_argument('--log-level=3') options.add_experimental_option('excludeSwitches', ['enable-automation', 'enable-logging']) - + # 根据操作系统选择不同的ChromeDriver路径处理方式 if os.name == 'nt': # Windows chrome_driver_path = os.path.join( @@ -66,7 +67,7 @@ def fetch_and_create_pdf(url): ) else: # Linux chrome_driver_path = '/usr/bin/chromedriver' # 使用系统PATH中的chromedriver - + try: if os.name == 'nt' and not os.path.exists(chrome_driver_path): chrome_driver_path = ChromeDriverManager().install() @@ -81,12 +82,14 @@ def fetch_and_create_pdf(url): # 获取目标页面 driver.get(url) try: - enter_button = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, '//a[contains(text(), "满18岁,请点此进入")]'))) + enter_button = WebDriverWait(driver, 5).until( + EC.element_to_be_clickable((By.XPATH, '//a[contains(text(), "满18岁,请点此进入")]'))) enter_button.click() logger.debug("点击了满18岁按钮") except Exception as e: logger.warning(f"未找到满18岁按钮,跳过此步骤: {e}") - WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tbody[id^="normalthread"]'))) + WebDriverWait(driver, 10).until( + EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tbody[id^="normalthread"]'))) # 处理年龄验证按钮 try: @@ -132,16 +135,16 @@ def fetch_and_create_pdf(url): # 设置PDF # 修改PDF文件路径到项目根目录的temp目录下 - pdf_filename = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), - 'temp', - f"JAV-{today}-{len(today_posts)}.pdf") + pdf_filename = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), + 'temp', + f"JAV-{today}-{len(today_posts)}.pdf") doc = SimpleDocTemplate(pdf_filename, pagesize=A3) - + # 计算内容区域的宽度和高度 page_width, page_height = A3 content_width = page_width - doc.rightMargin - doc.leftMargin content_height = page_height - doc.topMargin - doc.bottomMargin - + # 设置最大图片尺寸,留出一些边距 max_image_width = content_width * 0.95 max_image_height = content_height * 0.7 # 留出足够空间给文本和其他元素 @@ -210,19 +213,20 @@ def fetch_and_create_pdf(url): scale_width = max_image_width / img_width scale_height = max_image_height / img_height scale = min(scale_width, scale_height, 1.0) # 不超过原始大小 - + # 计算新的尺寸 new_width = img_width * scale new_height = img_height * scale - + # 重置文件指针 image.seek(0) img_stream = BytesIO(image.getvalue()) - + # 添加图片到内容中,使用计算后的尺寸 content.append(Image(img_stream, width=new_width, height=new_height)) content.append(Spacer(1, 4)) - logger.debug(f"处理图片: 原始尺寸 {img_width}x{img_height}, 新尺寸 {new_width}x{new_height}") + logger.debug( + f"处理图片: 原始尺寸 {img_width}x{img_height}, 新尺寸 {new_width}x{new_height}") except Exception as e: logger.error(f"处理图片时出错: {e}") @@ -264,6 +268,7 @@ def fetch_and_create_pdf(url): except Exception as e2: logger.error(f"强制终止Chrome进程失败: {e2}") + # add_pdf_encryption 和 pdf_file_path 函数保持不变 def add_pdf_encryption(pdf_file, password="4000"): """使用PyPDF2为PDF添加加密保护""" @@ -282,21 +287,21 @@ def add_pdf_encryption(pdf_file, password="4000"): def pdf_file_path(): try: - url = 'https://www.sehuatang.org/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481' + url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481' pdf_path = fetch_and_create_pdf(url) if pdf_path: logger.info(f"返回的PDF文件路径:{pdf_path}") - return pdf_path + return True, pdf_path else: # 如果生成失败,返回一个默认的PDF路径 default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf") logger.info(f"PDF生成失败,返回默认路径: {default_path}") - return default_path + return False, default_path except Exception as e: logger.error(f"生成PDF路径时出错: {e}") # 返回一个默认路径 default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf") - return default_path + return False, default_path if __name__ == "__main__": diff --git a/utils/sehuatang/shehuatang_undetected.py b/utils/sehuatang/shehuatang_undetected.py index 26e01d8..a234095 100644 --- a/utils/sehuatang/shehuatang_undetected.py +++ b/utils/sehuatang/shehuatang_undetected.py @@ -3,6 +3,7 @@ import os import requests from io import BytesIO import undetected_chromedriver as uc + if os.name == 'nt': try: uc.Chrome.__del__ = lambda self: None @@ -172,8 +173,22 @@ def fetch_and_create_pdf(url): def pdf_file_path_undetected(): - url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481' - return fetch_and_create_pdf(url) + try: + url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481' + pdf_path = fetch_and_create_pdf(url) + if pdf_path: + logger.info(f"返回的PDF文件路径:{pdf_path}") + return True, pdf_path + else: + # 如果生成失败,返回一个默认的PDF路径 + default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf") + logger.info(f"PDF生成失败,返回默认路径: {default_path}") + return False, default_path + except Exception as e: + logger.error(f"生成PDF路径时出错: {e}") + # 返回一个默认路径 + default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf") + return False, default_path if __name__ == "__main__":