diff --git a/sehuatang/shehuatang.py b/sehuatang/shehuatang.py index f3320e9..567dbea 100644 --- a/sehuatang/shehuatang.py +++ b/sehuatang/shehuatang.py @@ -16,7 +16,7 @@ from reportlab.pdfbase.ttfonts import TTFont from reportlab.pdfbase import pdfmetrics from datetime import datetime from PIL import Image as PILImage -from PyPDF2 import PdfReader, PdfWriter # 用于PDF加密 +import re # 用于正则表达式提取磁力链接 def download_image(url): @@ -39,25 +39,6 @@ def download_image(url): return None -def add_pdf_encryption(pdf_file, password="4000"): - """ 使用PyPDF2为PDF添加加密保护 """ - pdf_writer = PdfWriter() - pdf_reader = PdfReader(pdf_file) - - # 将所有页面添加到PDF写入器中 - for page_num in range(len(pdf_reader.pages)): - pdf_writer.add_page(pdf_reader.pages[page_num]) - - # 添加密码 - pdf_writer.encrypt(password) - - # 保存加密后的PDF - with open(pdf_file, "wb") as output_pdf: - pdf_writer.write(output_pdf) - - print(f"PDF加密成功,密码为: {password}") - - def fetch_and_create_pdf(url): """根据给定URL抓取页面并生成PDF""" # 配置Selenium以无头模式(即不显示浏览器窗口)运行 @@ -102,7 +83,10 @@ def fetch_and_create_pdf(url): # 设置标题和正文样式都使用SamHei字体 title_style = styles['Heading1'] - title_style.fontName = 'SamHei' # 设置标题使用SamHei字体 + title_style.fontName = 'SamHei' # 设置字体为SamHei + title_style.fontSize = 14 # 设置字体大小 + title_style.textColor = colors.red # 设置字体颜色为红色 + title_style.bold = True # 设置加粗 normal_style = styles['Normal'] normal_style.fontName = 'SamHei' # 设置正文使用SamHei字体 @@ -137,25 +121,34 @@ def fetch_and_create_pdf(url): content_div = post_soup.find('div', {'class': 't_fsz'}) if content_div: - # 提取文本 # 提取文本并将
标签替换为换行符 post_text = content_div.get_text(separator='\n', strip=True) # 使用 separator='\n' 参数替换
标签 - # 提取图片链接 + + # 查找磁力链接 + magnet_links = re.findall(r'magnet:\?[^ ]+', post_text) # 使用正则表达式查找磁力链接 + + # 添加标题到PDF + content.append(Paragraph(f"标题: {post_title}", title_style)) + content.append(Spacer(1, 12)) + content.append(Paragraph(f"来源URL: {post_page_url}", normal_style)) + content.append(Spacer(1, 12)) + content.append(Paragraph(f"介绍: {post_text}", normal_style)) + content.append(Spacer(1, 12)) # 添加空白区域 + + # 如果有磁力链接,将其单独加粗并显示 + if magnet_links: + for magnet_link in magnet_links: + # 将磁力链接作为加粗的内容显示 + content.append(Paragraph(f"Magnet Link: {magnet_link}", normal_style)) + content.append(Spacer(1, 12)) # 添加空白区域 + + # 添加图片 image_links = [] images = content_div.find_all('img') for img in images: if img.get('src') and 'http' in img.get('src'): image_links.append(img.get('src')) - # 添加标题到PDF - content.append(Paragraph(f"Title: {post_title}", title_style)) - content.append(Spacer(1, 12)) - content.append(Paragraph(f"Post URL: {post_page_url}", normal_style)) - content.append(Spacer(1, 12)) - content.append(Paragraph(f"Post Content: {post_text}", normal_style)) - content.append(Spacer(1, 12)) # 添加空白区域 - - # 添加图片 if image_links: for img_link in image_links: image = download_image(img_link) @@ -179,9 +172,6 @@ def fetch_and_create_pdf(url): absolute_pdf_path = os.path.abspath(pdf_filename) print(f"PDF saved as {absolute_pdf_path}") - # 加密PDF - add_pdf_encryption(absolute_pdf_path) - # 关闭浏览器 driver.quit()