优化排版

This commit is contained in:
liuwei
2025-02-08 15:15:07 +08:00
parent 9dc064682f
commit cc7e8ced51

View File

@@ -16,7 +16,7 @@ from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics
from datetime import datetime
from PIL import Image as PILImage
from PyPDF2 import PdfReader, PdfWriter # 用于PDF加密
import re # 用于正则表达式提取磁力链接
def download_image(url):
@@ -39,25 +39,6 @@ def download_image(url):
return None
def add_pdf_encryption(pdf_file, password="4000"):
""" 使用PyPDF2为PDF添加加密保护 """
pdf_writer = PdfWriter()
pdf_reader = PdfReader(pdf_file)
# 将所有页面添加到PDF写入器中
for page_num in range(len(pdf_reader.pages)):
pdf_writer.add_page(pdf_reader.pages[page_num])
# 添加密码
pdf_writer.encrypt(password)
# 保存加密后的PDF
with open(pdf_file, "wb") as output_pdf:
pdf_writer.write(output_pdf)
print(f"PDF加密成功密码为: {password}")
def fetch_and_create_pdf(url):
"""根据给定URL抓取页面并生成PDF"""
# 配置Selenium以无头模式即不显示浏览器窗口运行
@@ -102,7 +83,10 @@ def fetch_and_create_pdf(url):
# 设置标题和正文样式都使用SamHei字体
title_style = styles['Heading1']
title_style.fontName = 'SamHei' # 设置标题使用SamHei字体
title_style.fontName = 'SamHei' # 设置字体为SamHei
title_style.fontSize = 14 # 设置字体大小
title_style.textColor = colors.red # 设置字体颜色为红色
title_style.bold = True # 设置加粗
normal_style = styles['Normal']
normal_style.fontName = 'SamHei' # 设置正文使用SamHei字体
@@ -137,25 +121,34 @@ def fetch_and_create_pdf(url):
content_div = post_soup.find('div', {'class': 't_fsz'})
if content_div:
# 提取文本
# 提取文本并将 <br> 标签替换为换行符
post_text = content_div.get_text(separator='\n', strip=True) # 使用 separator='\n' 参数替换 <br> 标签
# 提取图片链接
# 查找磁力链接
magnet_links = re.findall(r'magnet:\?[^ ]+', post_text) # 使用正则表达式查找磁力链接
# 添加标题到PDF
content.append(Paragraph(f"标题: {post_title}", title_style))
content.append(Spacer(1, 12))
content.append(Paragraph(f"来源URL: {post_page_url}", normal_style))
content.append(Spacer(1, 12))
content.append(Paragraph(f"介绍: {post_text}", normal_style))
content.append(Spacer(1, 12)) # 添加空白区域
# 如果有磁力链接,将其单独加粗并显示
if magnet_links:
for magnet_link in magnet_links:
# 将磁力链接作为加粗的内容显示
content.append(Paragraph(f"Magnet Link: <b>{magnet_link}</b>", normal_style))
content.append(Spacer(1, 12)) # 添加空白区域
# 添加图片
image_links = []
images = content_div.find_all('img')
for img in images:
if img.get('src') and 'http' in img.get('src'):
image_links.append(img.get('src'))
# 添加标题到PDF
content.append(Paragraph(f"Title: {post_title}", title_style))
content.append(Spacer(1, 12))
content.append(Paragraph(f"Post URL: {post_page_url}", normal_style))
content.append(Spacer(1, 12))
content.append(Paragraph(f"Post Content: {post_text}", normal_style))
content.append(Spacer(1, 12)) # 添加空白区域
# 添加图片
if image_links:
for img_link in image_links:
image = download_image(img_link)
@@ -179,9 +172,6 @@ def fetch_and_create_pdf(url):
absolute_pdf_path = os.path.abspath(pdf_filename)
print(f"PDF saved as {absolute_pdf_path}")
# 加密PDF
add_pdf_encryption(absolute_pdf_path)
# 关闭浏览器
driver.quit()