优化排版
This commit is contained in:
@@ -16,7 +16,7 @@ from reportlab.pdfbase.ttfonts import TTFont
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from datetime import datetime
|
||||
from PIL import Image as PILImage
|
||||
from PyPDF2 import PdfReader, PdfWriter # 用于PDF加密
|
||||
import re # 用于正则表达式提取磁力链接
|
||||
|
||||
|
||||
def download_image(url):
|
||||
@@ -39,25 +39,6 @@ def download_image(url):
|
||||
return None
|
||||
|
||||
|
||||
def add_pdf_encryption(pdf_file, password="4000"):
|
||||
""" 使用PyPDF2为PDF添加加密保护 """
|
||||
pdf_writer = PdfWriter()
|
||||
pdf_reader = PdfReader(pdf_file)
|
||||
|
||||
# 将所有页面添加到PDF写入器中
|
||||
for page_num in range(len(pdf_reader.pages)):
|
||||
pdf_writer.add_page(pdf_reader.pages[page_num])
|
||||
|
||||
# 添加密码
|
||||
pdf_writer.encrypt(password)
|
||||
|
||||
# 保存加密后的PDF
|
||||
with open(pdf_file, "wb") as output_pdf:
|
||||
pdf_writer.write(output_pdf)
|
||||
|
||||
print(f"PDF加密成功,密码为: {password}")
|
||||
|
||||
|
||||
def fetch_and_create_pdf(url):
|
||||
"""根据给定URL抓取页面并生成PDF"""
|
||||
# 配置Selenium以无头模式(即不显示浏览器窗口)运行
|
||||
@@ -102,7 +83,10 @@ def fetch_and_create_pdf(url):
|
||||
|
||||
# 设置标题和正文样式都使用SamHei字体
|
||||
title_style = styles['Heading1']
|
||||
title_style.fontName = 'SamHei' # 设置标题使用SamHei字体
|
||||
title_style.fontName = 'SamHei' # 设置字体为SamHei
|
||||
title_style.fontSize = 14 # 设置字体大小
|
||||
title_style.textColor = colors.red # 设置字体颜色为红色
|
||||
title_style.bold = True # 设置加粗
|
||||
|
||||
normal_style = styles['Normal']
|
||||
normal_style.fontName = 'SamHei' # 设置正文使用SamHei字体
|
||||
@@ -137,25 +121,34 @@ def fetch_and_create_pdf(url):
|
||||
content_div = post_soup.find('div', {'class': 't_fsz'})
|
||||
|
||||
if content_div:
|
||||
# 提取文本
|
||||
# 提取文本并将 <br> 标签替换为换行符
|
||||
post_text = content_div.get_text(separator='\n', strip=True) # 使用 separator='\n' 参数替换 <br> 标签
|
||||
# 提取图片链接
|
||||
|
||||
# 查找磁力链接
|
||||
magnet_links = re.findall(r'magnet:\?[^ ]+', post_text) # 使用正则表达式查找磁力链接
|
||||
|
||||
# 添加标题到PDF
|
||||
content.append(Paragraph(f"标题: {post_title}", title_style))
|
||||
content.append(Spacer(1, 12))
|
||||
content.append(Paragraph(f"来源URL: {post_page_url}", normal_style))
|
||||
content.append(Spacer(1, 12))
|
||||
content.append(Paragraph(f"介绍: {post_text}", normal_style))
|
||||
content.append(Spacer(1, 12)) # 添加空白区域
|
||||
|
||||
# 如果有磁力链接,将其单独加粗并显示
|
||||
if magnet_links:
|
||||
for magnet_link in magnet_links:
|
||||
# 将磁力链接作为加粗的内容显示
|
||||
content.append(Paragraph(f"Magnet Link: <b>{magnet_link}</b>", normal_style))
|
||||
content.append(Spacer(1, 12)) # 添加空白区域
|
||||
|
||||
# 添加图片
|
||||
image_links = []
|
||||
images = content_div.find_all('img')
|
||||
for img in images:
|
||||
if img.get('src') and 'http' in img.get('src'):
|
||||
image_links.append(img.get('src'))
|
||||
|
||||
# 添加标题到PDF
|
||||
content.append(Paragraph(f"Title: {post_title}", title_style))
|
||||
content.append(Spacer(1, 12))
|
||||
content.append(Paragraph(f"Post URL: {post_page_url}", normal_style))
|
||||
content.append(Spacer(1, 12))
|
||||
content.append(Paragraph(f"Post Content: {post_text}", normal_style))
|
||||
content.append(Spacer(1, 12)) # 添加空白区域
|
||||
|
||||
# 添加图片
|
||||
if image_links:
|
||||
for img_link in image_links:
|
||||
image = download_image(img_link)
|
||||
@@ -179,9 +172,6 @@ def fetch_and_create_pdf(url):
|
||||
absolute_pdf_path = os.path.abspath(pdf_filename)
|
||||
print(f"PDF saved as {absolute_pdf_path}")
|
||||
|
||||
# 加密PDF
|
||||
add_pdf_encryption(absolute_pdf_path)
|
||||
|
||||
# 关闭浏览器
|
||||
driver.quit()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user