重构：新增定时插件业务逻辑内聚到各自插件目录

- daily_news 插件内置百度新闻与60s图片获取逻辑，移除对 base.func_news 的业务依赖\n- epic_free 插件内置周五判断与免费游戏抓取逻辑，移除对 base.func_epic 的业务依赖\n- daily_ranking 插件内置排行生成与积分奖励逻辑，不再依赖 MessageStorage 业务封装\n- sehuatang_push 改为引用插件目录内的抓取与PDF生成实现，将核心业务代码迁入插件目录\n- 确保新插件可独立承载自身业务逻辑，平台层仅提供调度与基础设施能力
2026-04-16 16:16:07 +08:00
parent 547c5533d7
commit bb73d07809
6 changed files with 717 additions and 16 deletions
--- a/plugins/sehuatang_push/main.py
+++ b/plugins/sehuatang_push/main.py
@@ -4,8 +4,8 @@ from typing import Any, Dict, List, Optional, Tuple

 from base.plugin_common.message_plugin_interface import MessagePluginInterface
 from base.plugin_common.plugin_interface import PluginStatus
-from utils.sehuatang.shehuatang import pdf_file_path
-from utils.sehuatang.shehuatang_undetected import pdf_file_path_undetected
+from plugins.sehuatang_push.shehuatang import pdf_file_path
+from plugins.sehuatang_push.shehuatang_undetected import pdf_file_path_undetected


 class SehuatangPushPlugin(MessagePluginInterface):
--- a/plugins/sehuatang_push/shehuatang.py
+++ b/plugins/sehuatang_push/shehuatang.py
@@ -0,0 +1,311 @@
+import time
+import os
+import requests
+from io import BytesIO
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from webdriver_manager.chrome import ChromeDriverManager
+from bs4 import BeautifulSoup
+from reportlab.lib.pagesizes import letter, A3
+from reportlab.lib import colors
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, PageBreak
+from reportlab.lib.styles import getSampleStyleSheet
+from reportlab.pdfbase.ttfonts import TTFont
+from reportlab.pdfbase import pdfmetrics
+from datetime import datetime
+from PIL import Image as PILImage
+import re
+from PyPDF2 import PdfReader, PdfWriter
+
+from loguru import logger
+
+
+# download_image 函数保持不变
+def download_image(url):
+    """下载大于100KB的图片并返回临时文件路径，仅支持jpg、jpeg和png格式"""
+    try:
+        if not url.lower().endswith(('.jpg', '.jpeg', '.png')):
+            return None
+
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Referer': 'https://tu.a7nz4.us',
+        }
+
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+        image = BytesIO(response.content)
+        return image
+    except requests.exceptions.RequestException as e:
+        logger.warning(f"下载图片失败: {e}")
+        return None
+
+
+def fetch_and_create_pdf(url):
+    """根据给定URL抓取页面并生成PDF"""
+    driver = None
+    try:
+        # 配置Selenium
+        options = Options()
+        options.add_argument('--headless')  # 使用新的headless模式
+        options.add_argument('--disable-gpu')
+        options.add_argument('--no-sandbox')
+        options.add_argument('--disable-dev-shm-usage')  # 添加Linux特定配置
+        options.add_argument('--disable-logging')
+        options.add_argument('--log-level=3')
+        options.add_experimental_option('excludeSwitches', ['enable-automation', 'enable-logging'])
+
+        # 根据操作系统选择不同的ChromeDriver路径处理方式
+        if os.name == 'nt':  # Windows
+            chrome_driver_path = os.path.join(
+                os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+                "utils", "chromedriver", "chromedriver.exe"
+            )
+        else:  # Linux
+            chrome_driver_path = '/usr/bin/chromedriver'  # 使用系统PATH中的chromedriver
+
+        try:
+            if os.name == 'nt' and not os.path.exists(chrome_driver_path):
+                chrome_driver_path = ChromeDriverManager().install()
+            service = Service(chrome_driver_path, log_path=os.devnull)
+            driver = webdriver.Chrome(service=service, options=options)
+        except Exception as e:
+            logger.debug(f"初始化ChromeDriver失败: {e}")
+            chrome_driver_path = ChromeDriverManager().install()
+            service = Service(chrome_driver_path, log_path=os.devnull)
+            driver = webdriver.Chrome(service=service, options=options)
+
+        # 获取目标页面
+        driver.get(url)
+        try:
+            enter_button = WebDriverWait(driver, 5).until(
+                EC.element_to_be_clickable((By.XPATH, '//a[contains(text(), "满18岁，请点此进入")]')))
+            enter_button.click()
+            logger.debug("点击了满18岁按钮")
+        except Exception as e:
+            logger.warning(f"未找到满18岁按钮，跳过此步骤: {e}")
+        WebDriverWait(driver, 10).until(
+            EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tbody[id^="normalthread"]')))
+
+        # 处理年龄验证按钮
+        try:
+            enter_button = driver.find_element(By.XPATH, '//a[contains(text(), "满18岁，请点此进入")]')
+            enter_button.click()
+            logger.debug("点击了满18岁按钮")
+            time.sleep(5)
+        except Exception as e:
+            logger.warning(f"未找到满18岁按钮，跳过此步骤: {e}")
+
+        # 解析页面
+        html = driver.page_source
+        soup = BeautifulSoup(html, 'html.parser', from_encoding='utf-8')
+        posts = soup.find_all('tbody', {'id': lambda x: x and x.startswith('normalthread')})
+
+        # 获取今天的日期
+        today = datetime.now().strftime('%Y-%m-%d')
+
+        # 注册中文字体
+        pdfmetrics.registerFont(TTFont('SimHei', 'fonts/simhei.ttf'))
+        styles = getSampleStyleSheet()
+
+        # 设置样式
+        title_style = styles['Heading1']
+        title_style.fontName = 'SimHei'
+        title_style.fontSize = 14
+        title_style.textColor = colors.red
+        title_style.bold = True
+
+        normal_style = styles['Normal']
+        normal_style.fontName = 'SimHei'
+        normal_style.fontSize = 14
+
+        content = []
+
+        # 过滤当天帖子并倒序
+        today_posts = []
+        for post in posts:
+            post_time_span = post.find('span', {'class': 'xi1'})
+            if post_time_span:
+                today_posts.append(post)
+        today_posts = today_posts[::-1]  # 倒序处理
+
+        # 设置PDF - 保存到 temp/JAV 目录
+        base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+        pdf_filename = os.path.join(base_dir, 'temp', 'JAV', f"JAV-{today}-{len(today_posts)}.pdf")
+
+        # 确保目录存在
+        pdf_dir = os.path.dirname(pdf_filename)
+        if not os.path.exists(pdf_dir):
+            os.makedirs(pdf_dir)
+        doc = SimpleDocTemplate(pdf_filename, pagesize=A3)
+
+        # 计算内容区域的宽度和高度
+        page_width, page_height = A3
+        content_width = page_width - doc.rightMargin - doc.leftMargin
+        content_height = page_height - doc.topMargin - doc.bottomMargin
+
+        # 设置最大图片尺寸，留出一些边距
+        max_image_width = content_width * 0.95
+        max_image_height = content_height * 0.7  # 留出足够空间给文本和其他元素
+
+        # 遍历帖子
+        session = requests.Session()
+        session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Referer': 'https://www.sehuatang.net/'
+        })
+        for c in driver.get_cookies():
+            try:
+                session.cookies.set(c['name'], c['value'], domain=c.get('domain'), path=c.get('path', '/'))
+            except Exception:
+                session.cookies.set(c['name'], c['value'])
+        for post in today_posts:
+            title = post.find('a', {'class': 's xst'})
+            if title:
+                post_title = title.get_text()
+                post_url = title.get('href')
+                logger.info(post_title)
+
+                # 获取帖子内容
+                post_page_url = 'https://www.sehuatang.net/' + post_url
+                try:
+                    resp = session.get(post_page_url, timeout=15)
+                    resp.raise_for_status()
+                    post_html = resp.text
+                except Exception as e:
+                    logger.warning(f"获取帖子内容失败: {e}")
+                    continue
+                post_soup = BeautifulSoup(post_html, 'html.parser', from_encoding='utf-8')
+                content_div = post_soup.find('div', {'class': 't_fsz'})
+
+                if content_div:
+                    # 提取文本和磁力链接
+                    post_text = content_div.get_text(strip=True)
+                    magnet_links = re.findall(r'magnet:\?[^ \u4e00-\u9fff]+', post_text)
+
+                    # 添加标题
+                    content.append(Paragraph(f" {post_title}", title_style))
+                    content.append(Spacer(1, 5))
+
+                    # 添加磁力链接
+                    if magnet_links:
+                        for magnet_link in magnet_links:
+                            content.append(Paragraph(f"<br /><b>{magnet_link}</b><br />", normal_style))
+                            content.append(Spacer(1, 12))
+
+                    # 添加图片
+                    image_links = []
+                    images = content_div.find_all('img')
+                    for img in images:
+                        if img.get('zoomfile') and 'http' in img.get('zoomfile'):
+                            image_links.append(img.get('zoomfile'))
+
+                    if image_links:
+                        for img_link in image_links:
+                            image = download_image(img_link)
+                            if image:
+                                try:
+                                    # 使用PIL处理图片尺寸
+                                    with PILImage.open(image) as img:
+                                        img_width, img_height = img.size
+                                        # 计算缩放比例，确保图片适应页面
+                                        scale_width = max_image_width / img_width
+                                        scale_height = max_image_height / img_height
+                                        scale = min(scale_width, scale_height, 1.0)  # 不超过原始大小
+
+                                        # 计算新的尺寸
+                                        new_width = img_width * scale
+                                        new_height = img_height * scale
+
+                                        # 重置文件指针
+                                        image.seek(0)
+                                        img_stream = BytesIO(image.getvalue())
+
+                                        # 添加图片到内容中，使用计算后的尺寸
+                                        content.append(Image(img_stream, width=new_width, height=new_height))
+                                        content.append(Spacer(1, 4))
+                                        logger.debug(
+                                            f"处理图片: 原始尺寸 {img_width}x{img_height}, 新尺寸 {new_width}x{new_height}")
+                                except Exception as e:
+                                    logger.error(f"处理图片时出错: {e}")
+
+                    # 在每个帖子后添加分页符（除了最后一页）
+                    if post != today_posts[-1]:
+                        content.append(PageBreak())
+
+        # 生成PDF
+        try:
+            doc.build(content)
+            absolute_pdf_path = os.path.abspath(pdf_filename)
+            logger.info(f"PDF saved as {absolute_pdf_path}")
+
+            # 加密PDF
+            add_pdf_encryption(absolute_pdf_path)
+            return absolute_pdf_path
+        except Exception as e:
+            logger.error(f"生成PDF时出错: {e}")
+            # 如果生成失败，返回一个默认路径或空字符串
+            return ""
+    except Exception as e:
+        logger.error(f"抓取帖子时出错: {e}")
+        # 如果抓取失败，返回一个默认路径或空字符串
+        return ""
+    finally:
+        # 确保在所有情况下都关闭driver
+        if driver:
+            try:
+                driver.quit()
+                logger.debug("Chrome driver已成功关闭")
+            except Exception as e:
+                logger.error(f"关闭Chrome driver时出错: {e}")
+                # 在极端情况下尝试强制结束进程
+                try:
+                    import psutil
+                    process = psutil.Process(driver.service.process.pid)
+                    process.terminate()
+                    logger.debug("已强制终止Chrome进程")
+                except Exception as e2:
+                    logger.error(f"强制终止Chrome进程失败: {e2}")
+
+
+# add_pdf_encryption 和 pdf_file_path 函数保持不变
+def add_pdf_encryption(pdf_file, password="4000"):
+    """使用PyPDF2为PDF添加加密保护"""
+    try:
+        pdf_writer = PdfWriter()
+        pdf_reader = PdfReader(pdf_file)
+        for page_num in range(len(pdf_reader.pages)):
+            pdf_writer.add_page(pdf_reader.pages[page_num])
+        pdf_writer.encrypt(password)
+        with open(pdf_file, "wb") as output_pdf:
+            pdf_writer.write(output_pdf)
+        logger.debug(f"PDF加密成功，密码为: {password}")
+    except Exception as e:
+        logger.error(f"PDF加密失败: {e}")
+
+
+def pdf_file_path():
+    try:
+        url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
+        pdf_path = fetch_and_create_pdf(url)
+        if pdf_path:
+            logger.info(f"返回的PDF文件路径：{pdf_path}")
+            return True, pdf_path
+        else:
+            # 如果生成失败，返回一个默认的PDF路径
+            default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
+            logger.info(f"PDF生成失败，返回默认路径: {default_path}")
+            return False, default_path
+    except Exception as e:
+        logger.error(f"生成PDF路径时出错: {e}")
+        # 返回一个默认路径
+        default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
+        return False, default_path
+
+
+if __name__ == "__main__":
+    pdf_file_path()
--- a/plugins/sehuatang_push/shehuatang_undetected.py
+++ b/plugins/sehuatang_push/shehuatang_undetected.py
@@ -0,0 +1,241 @@
+import time
+import os
+import requests
+from io import BytesIO
+import undetected_chromedriver as uc
+
+# 注意：不要禁用析构函数，否则会导致Chrome进程泄漏
+# if os.name == 'nt':
+#     try:
+#         uc.Chrome.__del__ = lambda self: None
+#     except Exception:
+#         pass
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from bs4 import BeautifulSoup
+from reportlab.lib.pagesizes import A3
+from reportlab.lib import colors
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, PageBreak
+from reportlab.lib.styles import getSampleStyleSheet
+from reportlab.pdfbase.ttfonts import TTFont
+from reportlab.pdfbase import pdfmetrics
+from datetime import datetime
+from PIL import Image as PILImage
+import re
+from PyPDF2 import PdfReader, PdfWriter
+from loguru import logger
+
+
+def download_image(url, session):
+    """使用同步的 session 下载图片，确保 Cookie 一致"""
+    try:
+        if not url.lower().endswith(('.jpg', '.jpeg', '.png')):
+            return None
+        response = session.get(url, timeout=15)
+        response.raise_for_status()
+        return BytesIO(response.content)
+    except Exception as e:
+        logger.warning(f"下载图片失败: {e}")
+        return None
+
+
+def add_pdf_encryption(pdf_file, password="4000"):
+    try:
+        pdf_writer = PdfWriter()
+        pdf_reader = PdfReader(pdf_file)
+        for page in pdf_reader.pages:
+            pdf_writer.add_page(page)
+        pdf_writer.encrypt(password)
+        with open(pdf_file, "wb") as output_pdf:
+            pdf_writer.write(output_pdf)
+        logger.debug("PDF加密成功")
+    except Exception as e:
+        logger.error(f"PDF加密失败: {e}")
+
+
+def fetch_and_create_pdf(url):
+    driver = None
+    service = None
+    try:
+        options = uc.ChromeOptions()
+        # 规避检测的关键配置
+        # 在Linux服务器上使用headless模式
+        if os.name != 'nt':
+            options.headless = True
+            options.add_argument('--headless=new')  # 使用新版headless模式
+        else:
+            options.headless = False
+
+        options.add_argument('--no-sandbox')
+        options.add_argument('--disable-gpu')
+        options.add_argument('--disable-dev-shm-usage')
+        options.add_argument('--disable-extensions')
+        options.add_argument('--disable-background-networking')
+        # 确保进程能被正确清理
+        options.add_argument('--disable-crash-reporter')
+        options.add_argument('--disable-in-process-stack-traces')
+        options.add_argument('--disable-logging')
+        options.add_argument('--disable-dev-shm-usage')
+
+        # 创建driver实例
+        # 让 undetected_chromedriver 自动检测浏览器版本并下载匹配的 ChromeDriver
+        # 强制指定版本为144，以匹配服务器当前的 Chrome 版本
+        driver = uc.Chrome(options=options, version_main=144)
+
+        logger.info(f"正在访问: {url}")
+        driver.get(url)
+
+        # 等待 Cloudflare 5秒盾结束，并处理“满18岁”按钮
+        time.sleep(8)
+
+        try:
+            enter_btn = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.XPATH, '//a[contains(text(), "满18岁，请点此进入")]'))
+            )
+            enter_btn.click()
+            logger.debug("点击了年龄确认按钮")
+            time.sleep(3)
+        except Exception:
+            logger.debug("未发现年龄验证按钮，可能已过检测")
+
+        # 确保列表加载
+        WebDriverWait(driver, 20).until(
+            EC.presence_of_element_located((By.CSS_SELECTOR, 'tbody[id^="normalthread"]'))
+        )
+
+        # 提取数据
+        soup = BeautifulSoup(driver.page_source, 'html.parser')
+        posts = [p for p in soup.find_all('tbody', {'id': lambda x: x and x.startswith('normalthread')}) if
+                 p.find('span', {'class': 'xi1'})]
+        today_posts = posts[::-1]
+
+        # 字体注册
+        pdfmetrics.registerFont(TTFont('SimHei', 'fonts/simhei.ttf'))
+        styles = getSampleStyleSheet()
+        title_style = styles['Heading1']
+        title_style.fontName = 'SimHei'
+        title_style.textColor = colors.red
+        normal_style = styles['Normal']
+        normal_style.fontName = 'SimHei'
+
+        # 路径逻辑 - 保存到 temp/JAV 目录
+        base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+        save_path = os.path.join(base_dir, 'temp', 'JAV')
+        if not os.path.exists(save_path):
+            os.makedirs(save_path)
+        pdf_filename = os.path.join(save_path, f"JAV-{datetime.now().strftime('%Y-%m-%d')}-{len(today_posts)}.pdf")
+
+        doc = SimpleDocTemplate(pdf_filename, pagesize=A3)
+        content = []
+        max_w, max_h = (A3[0] - 72) * 0.95, (A3[1] - 72) * 0.7
+
+        # 同步 Session
+        session = requests.Session()
+        ua = driver.execute_script("return navigator.userAgent")
+        session.headers.update({'User-Agent': ua, 'Referer': 'https://www.sehuatang.net/'})
+        for c in driver.get_cookies():
+            session.cookies.set(c['name'], c['value'])
+
+        # 循环帖子
+        for post in today_posts:
+            title_tag = post.find('a', {'class': 's xst'})
+            if not title_tag: continue
+
+            p_title = title_tag.get_text()
+            p_url = 'https://www.sehuatang.net/' + title_tag.get('href')
+            logger.info(f"详情页: {p_title}")
+
+            try:
+                resp = session.get(p_url, timeout=15)
+                p_soup = BeautifulSoup(resp.text, 'html.parser')
+                div = p_soup.find('div', {'class': 't_fsz'})
+
+                if div:
+                    content.append(Paragraph(f" {p_title}", title_style))
+                    magnets = re.findall(r'magnet:\?[^ \u4e00-\u9fff]+', div.get_text())
+                    for m in magnets:
+                        content.append(Paragraph(f"<b>{m}</b>", normal_style))
+
+                    for img_tag in div.find_all('img'):
+                        src = img_tag.get('zoomfile')
+                        if src and 'http' in src:
+                            img_io = download_image(src, session)
+                            if img_io:
+                                with PILImage.open(img_io) as p_img:
+                                    iw, ih = p_img.size
+                                    sc = min(max_w / iw, max_h / ih, 1.0)
+                                    img_io.seek(0)
+                                    content.append(Image(img_io, width=iw * sc, height=ih * sc))
+
+                    if post != today_posts[-1]: content.append(PageBreak())
+            except Exception as e:
+                logger.error(f"帖子处理失败: {e}")
+
+        doc.build(content)
+        add_pdf_encryption(pdf_filename)
+        return pdf_filename
+
+    except Exception as e:
+        logger.exception(f"抓取异常: {e}")
+        return ""
+    finally:
+        # --- 确保Chrome进程被完全关闭 ---
+        if driver:
+            try:
+                logger.debug("正在安全关闭浏览器...")
+                # 先关闭所有标签页和窗口
+                try:
+                    driver.close()
+                except Exception as e:
+                    logger.warning(f"关闭浏览器窗口时出错: {e}")
+
+                # 强制退出所有Chrome进程
+                driver.quit()
+                logger.debug("浏览器已完全关闭")
+            except Exception as e:
+                logger.error(f"关闭浏览器时出错: {e}")
+
+        # 额外保险：强制清理残留的Chrome进程（仅Linux）
+        if os.name != 'nt':
+            try:
+                import psutil
+                current_user = os.getlogin()
+                for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'username']):
+                    try:
+                        if proc.info['name'] and 'chrome' in proc.info['name'].lower():
+                            if proc.info['username'] == current_user:
+                                # 检查是否是本次启动的chrome进程（通过命令行参数判断）
+                                cmdline = proc.info.get('cmdline', [])
+                                if cmdline and any('--user-data-dir=/tmp/playwright' in str(cmd) for cmd in cmdline):
+                                    logger.info(f"强制终止残留Chrome进程: PID={proc.info['pid']}")
+                                    proc.kill()
+                    except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
+                        pass
+            except ImportError:
+                logger.debug("未安装psutil，跳过强制清理")
+            except Exception as e:
+                logger.warning(f"强制清理Chrome进程时出错: {e}")
+
+
+def pdf_file_path_undetected():
+    try:
+        url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
+        pdf_path = fetch_and_create_pdf(url)
+        if pdf_path:
+            logger.info(f"返回的PDF文件路径：{pdf_path}")
+            return True, pdf_path
+        else:
+            # 如果生成失败，返回一个默认的PDF路径
+            default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
+            logger.info(f"PDF生成失败，返回默认路径: {default_path}")
+            return False, default_path
+    except Exception as e:
+        logger.error(f"生成PDF路径时出错: {e}")
+        # 返回一个默认路径
+        default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
+        return False, default_path
+
+
+if __name__ == "__main__":
+    pdf_file_path_undetected()