优化排版

This commit is contained in:
liuwei
2025-02-08 15:15:07 +08:00
parent 9dc064682f
commit cc7e8ced51

View File

@@ -16,7 +16,7 @@ from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase import pdfmetrics
from datetime import datetime from datetime import datetime
from PIL import Image as PILImage from PIL import Image as PILImage
from PyPDF2 import PdfReader, PdfWriter # 用于PDF加密 import re # 用于正则表达式提取磁力链接
def download_image(url): def download_image(url):
@@ -39,25 +39,6 @@ def download_image(url):
return None return None
def add_pdf_encryption(pdf_file, password="4000"):
""" 使用PyPDF2为PDF添加加密保护 """
pdf_writer = PdfWriter()
pdf_reader = PdfReader(pdf_file)
# 将所有页面添加到PDF写入器中
for page_num in range(len(pdf_reader.pages)):
pdf_writer.add_page(pdf_reader.pages[page_num])
# 添加密码
pdf_writer.encrypt(password)
# 保存加密后的PDF
with open(pdf_file, "wb") as output_pdf:
pdf_writer.write(output_pdf)
print(f"PDF加密成功密码为: {password}")
def fetch_and_create_pdf(url): def fetch_and_create_pdf(url):
"""根据给定URL抓取页面并生成PDF""" """根据给定URL抓取页面并生成PDF"""
# 配置Selenium以无头模式即不显示浏览器窗口运行 # 配置Selenium以无头模式即不显示浏览器窗口运行
@@ -102,7 +83,10 @@ def fetch_and_create_pdf(url):
# 设置标题和正文样式都使用SamHei字体 # 设置标题和正文样式都使用SamHei字体
title_style = styles['Heading1'] title_style = styles['Heading1']
title_style.fontName = 'SamHei' # 设置标题使用SamHei字体 title_style.fontName = 'SamHei' # 设置字体为SamHei
title_style.fontSize = 14 # 设置字体大小
title_style.textColor = colors.red # 设置字体颜色为红色
title_style.bold = True # 设置加粗
normal_style = styles['Normal'] normal_style = styles['Normal']
normal_style.fontName = 'SamHei' # 设置正文使用SamHei字体 normal_style.fontName = 'SamHei' # 设置正文使用SamHei字体
@@ -137,25 +121,34 @@ def fetch_and_create_pdf(url):
content_div = post_soup.find('div', {'class': 't_fsz'}) content_div = post_soup.find('div', {'class': 't_fsz'})
if content_div: if content_div:
# 提取文本
# 提取文本并将 <br> 标签替换为换行符 # 提取文本并将 <br> 标签替换为换行符
post_text = content_div.get_text(separator='\n', strip=True) # 使用 separator='\n' 参数替换 <br> 标签 post_text = content_div.get_text(separator='\n', strip=True) # 使用 separator='\n' 参数替换 <br> 标签
# 提取图片链接
# 查找磁力链接
magnet_links = re.findall(r'magnet:\?[^ ]+', post_text) # 使用正则表达式查找磁力链接
# 添加标题到PDF
content.append(Paragraph(f"标题: {post_title}", title_style))
content.append(Spacer(1, 12))
content.append(Paragraph(f"来源URL: {post_page_url}", normal_style))
content.append(Spacer(1, 12))
content.append(Paragraph(f"介绍: {post_text}", normal_style))
content.append(Spacer(1, 12)) # 添加空白区域
# 如果有磁力链接,将其单独加粗并显示
if magnet_links:
for magnet_link in magnet_links:
# 将磁力链接作为加粗的内容显示
content.append(Paragraph(f"Magnet Link: <b>{magnet_link}</b>", normal_style))
content.append(Spacer(1, 12)) # 添加空白区域
# 添加图片
image_links = [] image_links = []
images = content_div.find_all('img') images = content_div.find_all('img')
for img in images: for img in images:
if img.get('src') and 'http' in img.get('src'): if img.get('src') and 'http' in img.get('src'):
image_links.append(img.get('src')) image_links.append(img.get('src'))
# 添加标题到PDF
content.append(Paragraph(f"Title: {post_title}", title_style))
content.append(Spacer(1, 12))
content.append(Paragraph(f"Post URL: {post_page_url}", normal_style))
content.append(Spacer(1, 12))
content.append(Paragraph(f"Post Content: {post_text}", normal_style))
content.append(Spacer(1, 12)) # 添加空白区域
# 添加图片
if image_links: if image_links:
for img_link in image_links: for img_link in image_links:
image = download_image(img_link) image = download_image(img_link)
@@ -179,9 +172,6 @@ def fetch_and_create_pdf(url):
absolute_pdf_path = os.path.abspath(pdf_filename) absolute_pdf_path = os.path.abspath(pdf_filename)
print(f"PDF saved as {absolute_pdf_path}") print(f"PDF saved as {absolute_pdf_path}")
# 加密PDF
add_pdf_encryption(absolute_pdf_path)
# 关闭浏览器 # 关闭浏览器
driver.quit() driver.quit()