优化排版

2025-02-08 15:15:07 +08:00
parent 9dc064682f
commit cc7e8ced51
1 changed files with 25 additions and 35 deletions
--- a/sehuatang/shehuatang.py
+++ b/sehuatang/shehuatang.py
@@ -16,7 +16,7 @@ from reportlab.pdfbase.ttfonts import TTFont
 from reportlab.pdfbase import pdfmetrics
 from datetime import datetime
 from PIL import Image as PILImage
-from PyPDF2 import PdfReader, PdfWriter  # 用于PDF加密
+import re  # 用于正则表达式提取磁力链接
 def download_image(url):
@@ -39,25 +39,6 @@ def download_image(url):
        return None
 def add_pdf_encryption(pdf_file, password="4000"):
    """ 使用PyPDF2为PDF添加加密保护 """
    pdf_writer = PdfWriter()
    pdf_reader = PdfReader(pdf_file)
    # 将所有页面添加到PDF写入器中
    for page_num in range(len(pdf_reader.pages)):
        pdf_writer.add_page(pdf_reader.pages[page_num])
    # 添加密码
    pdf_writer.encrypt(password)
    # 保存加密后的PDF
    with open(pdf_file, "wb") as output_pdf:
        pdf_writer.write(output_pdf)
    print(f"PDF加密成功，密码为: {password}")
 def fetch_and_create_pdf(url):
    """根据给定URL抓取页面并生成PDF"""
    # 配置Selenium以无头模式（即不显示浏览器窗口）运行
@@ -102,7 +83,10 @@ def fetch_and_create_pdf(url):
    # 设置标题和正文样式都使用SamHei字体
    title_style = styles['Heading1']
-    title_style.fontName = 'SamHei'  # 设置标题使用SamHei字体
+    title_style.fontName = 'SamHei'  # 设置字体为SamHei
    title_style.fontSize = 14  # 设置字体大小
    title_style.textColor = colors.red  # 设置字体颜色为红色
    title_style.bold = True  # 设置加粗
    normal_style = styles['Normal']
    normal_style.fontName = 'SamHei'  # 设置正文使用SamHei字体
@@ -137,25 +121,34 @@ def fetch_and_create_pdf(url):
            content_div = post_soup.find('div', {'class': 't_fsz'})
            if content_div:
                # 提取文本
                # 提取文本并将 <br> 标签替换为换行符
                post_text = content_div.get_text(separator='\n', strip=True)  # 使用 separator='\n' 参数替换 <br> 标签
-                # 提取图片链接
+
                # 查找磁力链接
                magnet_links = re.findall(r'magnet:\?[^ ]+', post_text)  # 使用正则表达式查找磁力链接
                # 添加标题到PDF
                content.append(Paragraph(f"标题: {post_title}", title_style))
                content.append(Spacer(1, 12))
                content.append(Paragraph(f"来源URL: {post_page_url}", normal_style))
                content.append(Spacer(1, 12))
                content.append(Paragraph(f"介绍: {post_text}", normal_style))
                content.append(Spacer(1, 12))  # 添加空白区域
                # 如果有磁力链接，将其单独加粗并显示
                if magnet_links:
                    for magnet_link in magnet_links:
                        # 将磁力链接作为加粗的内容显示
                        content.append(Paragraph(f"Magnet Link: <b>{magnet_link}</b>", normal_style))
                        content.append(Spacer(1, 12))  # 添加空白区域
                # 添加图片
                image_links = []
                images = content_div.find_all('img')
                for img in images:
                    if img.get('src') and 'http' in img.get('src'):
                        image_links.append(img.get('src'))
                # 添加标题到PDF
                content.append(Paragraph(f"Title: {post_title}", title_style))
                content.append(Spacer(1, 12))
                content.append(Paragraph(f"Post URL: {post_page_url}", normal_style))
                content.append(Spacer(1, 12))
                content.append(Paragraph(f"Post Content: {post_text}", normal_style))
                content.append(Spacer(1, 12))  # 添加空白区域
                # 添加图片
                if image_links:
                    for img_link in image_links:
                        image = download_image(img_link)
@@ -179,9 +172,6 @@ def fetch_and_create_pdf(url):
    absolute_pdf_path = os.path.abspath(pdf_filename)
    print(f"PDF saved as {absolute_pdf_path}")
    # 加密PDF
    add_pdf_encryption(absolute_pdf_path)
    # 关闭浏览器
    driver.quit()