Files
abot/plugins/xiuren_image/xiuren_pdf.py
2025-05-26 10:10:23 +08:00

161 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
from reportlab.lib.pagesizes import A3
from reportlab.platypus import SimpleDocTemplate, Image
from PyPDF2 import PdfReader, PdfWriter
from PIL import Image as PILImage
import io
def compress_image(image_path, target_size_kb=300):
"""快速压缩图片到目标大小单位KB"""
with PILImage.open(image_path) as img:
# 调整图片尺寸,保持宽高比
# 如果图片有透明度转换为RGB
if img.mode in ('RGBA', 'P'):
img = img.convert('RGB')
# 获取原始文件大小KB
original_size_kb = os.path.getsize(image_path) / 1024
# 如果原始大小已小于目标大小,直接返回原始数据
if original_size_kb <= target_size_kb:
output = io.BytesIO()
img.save(output, format='JPEG', quality=85)
return output.getvalue()
# 根据原始大小和目标大小预估初始质量
# 假设质量与文件大小近似线性关系,设置一个初始值
estimated_quality = min(95, max(10, int(85 * (target_size_kb / original_size_kb))))
output = io.BytesIO()
img.save(output, format='JPEG', quality=estimated_quality)
size_kb = len(output.getvalue()) / 1024
# 如果预估结果偏差较大使用二分法调整最多尝试3次
low, high = 10, 95
for _ in range(3):
if size_kb <= target_size_kb * 0.9 or size_kb >= target_size_kb * 1.1: # 允许±10%偏差
quality = (low + high) // 2
output.seek(0)
output.truncate(0)
img.save(output, format='JPEG', quality=quality)
size_kb = len(output.getvalue()) / 1024
if size_kb > target_size_kb:
high = quality - 1
else:
low = quality + 1
else:
break
return output.getvalue()
def create_pdf_from_images(directory, output_pdf):
"""从目录中的图片创建PDF"""
image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
image_files = []
# 遍历目录,筛选图片文件
for root, dirs, files in os.walk(directory):
for file in files:
_, ext = os.path.splitext(file)
if ext.lower() in image_extensions:
image_files.append(os.path.join(root, file))
# 如果没有找到图片,返回
if not image_files:
print(f"未在 {directory} 中找到图片文件。")
return
# 按文件名排序
image_files.sort()
# 设置PDF文档使用A3页面大小
doc = SimpleDocTemplate(output_pdf, pagesize=A3)
# 创建图片列表
image_list = []
for image_file in image_files:
# 压缩图片到~300KB
compressed_image_data = compress_image(image_file)
# 从压缩字节数据中读取图片以获取新尺寸
with PILImage.open(io.BytesIO(compressed_image_data)) as compressed_img:
img_width, img_height = compressed_img.size
# 设置目标宽度适应A3页面宽度最大800点
target_width = 800
# 计算新高度,保持宽高比
target_height = int(img_height * (target_width / img_width))
# 限制最大高度,避免超出页面
max_page_height = 1091 - 100 # A3页面高度1191点留出空间
if target_height > max_page_height:
target_height = max_page_height
target_width = int(target_height * (img_width / img_height))
# 创建图片对象并添加到列表
img = Image(io.BytesIO(compressed_image_data), width=target_width, height=target_height)
image_list.append(img)
# 生成PDF
doc.build(image_list)
print(f"PDF {output_pdf} 创建成功。")
def encrypt_pdf(input_pdf, output_pdf, password):
"""加密PDF文件"""
writer = PdfWriter()
with open(input_pdf, "rb") as file:
reader = PdfReader(file)
for page in reader.pages:
writer.add_page(page)
with open(output_pdf, "wb") as file:
writer.encrypt(password)
writer.write(file)
print(f"PDF {output_pdf} 加密成功。")
def generate_pdf_from_images(directory):
"""从数字命名的文件夹生成PDF"""
# 获取目录下所有数字命名的文件夹
folder_names = [folder for folder in os.listdir(directory)
if os.path.isdir(os.path.join(directory, folder)) and folder.isdigit()]
# 如果没有数字命名的文件夹,返回
if not folder_names:
print("未找到数字命名的文件夹。")
return
# 循环处理每个文件夹
for folder_name in folder_names:
folder_path = os.path.join(directory, folder_name)
# 设置PDF输出路径
output_pdf = f"./PDF/{folder_name}.pdf"
# 如果PDF已存在跳过
if os.path.exists(output_pdf):
print(f"PDF {output_pdf} 已存在,跳过...")
continue
# 创建PDF目录
os.makedirs("../../xiuren/PDF", exist_ok=True)
# 创建PDF
create_pdf_from_images(folder_path, output_pdf)
# 加密PDF
encrypt_pdf(output_pdf, output_pdf, "4000") # 密码为4000
# 返回PDF绝对路径
return os.path.abspath(output_pdf)
# 示例用法
if __name__ == "__main__":
generate_pdf_from_images("./")