diff --git a/sehuatang/shehuatang.py b/sehuatang/shehuatang.py
index f3320e9..567dbea 100644
--- a/sehuatang/shehuatang.py
+++ b/sehuatang/shehuatang.py
@@ -16,7 +16,7 @@ from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics
from datetime import datetime
from PIL import Image as PILImage
-from PyPDF2 import PdfReader, PdfWriter # 用于PDF加密
+import re # 用于正则表达式提取磁力链接
def download_image(url):
@@ -39,25 +39,6 @@ def download_image(url):
return None
-def add_pdf_encryption(pdf_file, password="4000"):
- """ 使用PyPDF2为PDF添加加密保护 """
- pdf_writer = PdfWriter()
- pdf_reader = PdfReader(pdf_file)
-
- # 将所有页面添加到PDF写入器中
- for page_num in range(len(pdf_reader.pages)):
- pdf_writer.add_page(pdf_reader.pages[page_num])
-
- # 添加密码
- pdf_writer.encrypt(password)
-
- # 保存加密后的PDF
- with open(pdf_file, "wb") as output_pdf:
- pdf_writer.write(output_pdf)
-
- print(f"PDF加密成功,密码为: {password}")
-
-
def fetch_and_create_pdf(url):
"""根据给定URL抓取页面并生成PDF"""
# 配置Selenium以无头模式(即不显示浏览器窗口)运行
@@ -102,7 +83,10 @@ def fetch_and_create_pdf(url):
# 设置标题和正文样式都使用SamHei字体
title_style = styles['Heading1']
- title_style.fontName = 'SamHei' # 设置标题使用SamHei字体
+ title_style.fontName = 'SamHei' # 设置字体为SamHei
+ title_style.fontSize = 14 # 设置字体大小
+ title_style.textColor = colors.red # 设置字体颜色为红色
+ title_style.bold = True # 设置加粗
normal_style = styles['Normal']
normal_style.fontName = 'SamHei' # 设置正文使用SamHei字体
@@ -137,25 +121,34 @@ def fetch_and_create_pdf(url):
content_div = post_soup.find('div', {'class': 't_fsz'})
if content_div:
- # 提取文本
# 提取文本并将
标签替换为换行符
post_text = content_div.get_text(separator='\n', strip=True) # 使用 separator='\n' 参数替换
标签
- # 提取图片链接
+
+ # 查找磁力链接
+ magnet_links = re.findall(r'magnet:\?[^ ]+', post_text) # 使用正则表达式查找磁力链接
+
+ # 添加标题到PDF
+ content.append(Paragraph(f"标题: {post_title}", title_style))
+ content.append(Spacer(1, 12))
+ content.append(Paragraph(f"来源URL: {post_page_url}", normal_style))
+ content.append(Spacer(1, 12))
+ content.append(Paragraph(f"介绍: {post_text}", normal_style))
+ content.append(Spacer(1, 12)) # 添加空白区域
+
+ # 如果有磁力链接,将其单独加粗并显示
+ if magnet_links:
+ for magnet_link in magnet_links:
+ # 将磁力链接作为加粗的内容显示
+ content.append(Paragraph(f"Magnet Link: {magnet_link}", normal_style))
+ content.append(Spacer(1, 12)) # 添加空白区域
+
+ # 添加图片
image_links = []
images = content_div.find_all('img')
for img in images:
if img.get('src') and 'http' in img.get('src'):
image_links.append(img.get('src'))
- # 添加标题到PDF
- content.append(Paragraph(f"Title: {post_title}", title_style))
- content.append(Spacer(1, 12))
- content.append(Paragraph(f"Post URL: {post_page_url}", normal_style))
- content.append(Spacer(1, 12))
- content.append(Paragraph(f"Post Content: {post_text}", normal_style))
- content.append(Spacer(1, 12)) # 添加空白区域
-
- # 添加图片
if image_links:
for img_link in image_links:
image = download_image(img_link)
@@ -179,9 +172,6 @@ def fetch_and_create_pdf(url):
absolute_pdf_path = os.path.abspath(pdf_filename)
print(f"PDF saved as {absolute_pdf_path}")
- # 加密PDF
- add_pdf_encryption(absolute_pdf_path)
-
# 关闭浏览器
driver.quit()