进行图片压缩，控制没张在300KB

2025-03-03 09:27:26 +08:00
parent 426a2451ee
commit c7a3b151b4
1 changed files with 90 additions and 30 deletions
--- a/xiuren/xiuren_pdf.py
+++ b/xiuren/xiuren_pdf.py
@@ -3,25 +3,72 @@ from reportlab.lib.pagesizes import A3
 from reportlab.platypus import SimpleDocTemplate, Image
 from PyPDF2 import PdfReader, PdfWriter
 from PIL import Image as PILImage
 import io
 def compress_image(image_path, target_size_kb=300):
    """快速压缩图片到目标大小（单位：KB）"""
    img = PILImage.open(image_path)
    # 如果图片有透明度，转换为RGB
    if img.mode in ('RGBA', 'P'):
        img = img.convert('RGB')
    # 获取原始文件大小（KB）
    original_size_kb = os.path.getsize(image_path) / 1024
    # 如果原始大小已小于目标大小，直接返回原始数据
    if original_size_kb <= target_size_kb:
        output = io.BytesIO()
        img.save(output, format='JPEG', quality=85)
        return output.getvalue()
    # 根据原始大小和目标大小预估初始质量
    # 假设质量与文件大小近似线性关系，设置一个初始值
    estimated_quality = min(95, max(10, int(85 * (target_size_kb / original_size_kb))))
    output = io.BytesIO()
    img.save(output, format='JPEG', quality=estimated_quality)
    size_kb = len(output.getvalue()) / 1024
    # 如果预估结果偏差较大，使用二分法调整（最多尝试3次）
    low, high = 10, 95
    for _ in range(3):
        if size_kb <= target_size_kb * 0.9 or size_kb >= target_size_kb * 1.1:  # 允许±10%偏差
            quality = (low + high) // 2
            output.seek(0)
            output.truncate(0)
            img.save(output, format='JPEG', quality=quality)
            size_kb = len(output.getvalue()) / 1024
            if size_kb > target_size_kb:
                high = quality - 1
            else:
                low = quality + 1
        else:
            break
    return output.getvalue()
 def create_pdf_from_images(directory, output_pdf):
    """从目录中的图片创建PDF"""
    image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
    image_files = []
-    # 遍历目录并筛选图片文件
+    # 遍历目录，筛选图片文件
    for root, dirs, files in os.walk(directory):
        for file in files:
            _, ext = os.path.splitext(file)
            if ext.lower() in image_extensions:
                image_files.append(os.path.join(root, file))
-    # 如果没有找到任何图片，返回
+    # 如果没有找到图片，返回
    if not image_files:
-        print(f"No image files found in {directory}.")
+        print(f"未在 {directory} 中找到图片文件。")
        return
-    # 按文件名排序，确保图片按顺序
+    # 按文件名排序
    image_files.sort()
    # 设置PDF文档，使用A3页面大小
@@ -30,33 +77,40 @@ def create_pdf_from_images(directory, output_pdf):
    # 创建图片列表
    image_list = []
    for image_file in image_files:
-        # 使用 Pillow 获取图片的尺寸
+        # 使用Pillow获取图片尺寸
        with PILImage.open(image_file) as img:
            img_width, img_height = img.size
-        # 设置目标宽度（适应A3页面宽度，最大842点）
+        # 压缩图片到~300KB
-        target_width = 800  # 设置图片宽度为最大800点，适应A3页面
+        compressed_image_data = compress_image(image_file)
-        # 计算新的高度，保持原始宽高比
+        # 从压缩字节数据中读取图片以获取新尺寸
        compressed_img = PILImage.open(io.BytesIO(compressed_image_data))
        img_width, img_height = compressed_img.size
        # 设置目标宽度（适应A3页面宽度，最大800点）
        target_width = 800
        # 计算新高度，保持宽高比
        target_height = int(img_height * (target_width / img_width))
-        # 限制图片的最大高度，避免超出页面
+        # 限制最大高度，避免超出页面
-        max_page_height = 1091 - 100  # 留出一些空间，A3页面高度1191点
+        max_page_height = 1091 - 100  # A3页面高度1191点，留出空间
        if target_height > max_page_height:
            target_height = max_page_height
-            target_width = int(target_height * (img_width / img_height))  # 调整宽度保持比例
+            target_width = int(target_height * (img_width / img_height))
-        # 创建图片对象并添加到图片列表
+        # 创建图片对象并添加到列表
-        img = Image(image_file, width=target_width, height=target_height)
+        img = Image(io.BytesIO(compressed_image_data), width=target_width, height=target_height)
        image_list.append(img)
    # 生成PDF
    doc.build(image_list)
-    print(f"PDF {output_pdf} created successfully.")
+    print(f"PDF {output_pdf} 创建成功。")
 def encrypt_pdf(input_pdf, output_pdf, password):
-    # 使用 PyPDF2 加密 PDF
+    """加密PDF文件"""
    writer = PdfWriter()
    with open(input_pdf, "rb") as file:
        reader = PdfReader(file)
@@ -66,39 +120,45 @@ def encrypt_pdf(input_pdf, output_pdf, password):
    with open(output_pdf, "wb") as file:
        writer.encrypt(password)
        writer.write(file)
-    print(f"PDF {output_pdf} encrypted successfully.")
+    print(f"PDF {output_pdf} 加密成功。")
 def generate_pdf_from_images(directory):
-    # 获取当前目录下所有数字命名的文件夹
+    """从数字命名的文件夹生成PDF"""
    # 获取目录下所有数字命名的文件夹
    folder_names = [folder for folder in os.listdir(directory)
                    if os.path.isdir(os.path.join(directory, folder)) and folder.isdigit()]
-    # 如果没有数字命名的文件夹，提示并返回
+    # 如果没有数字命名的文件夹，返回
    if not folder_names:
-        print("No numeric-named folders found.")
+        print("未找到数字命名的文件夹。")
        return
-    # 循环处理每个数字命名的文件夹
+    # 循环处理每个文件夹
    for folder_name in folder_names:
        folder_path = os.path.join(directory, folder_name)
-        # 设置 PDF 输出路径，使用文件夹名称作为文件名，并存储到 ./PDF 目录下
+        # 设置PDF输出路径
        output_pdf = f"./PDF/{folder_name}.pdf"
-        # 检查 PDF 文件是否已存在
+        # 如果PDF已存在，跳过
        if os.path.exists(output_pdf):
-            print(f"PDF {output_pdf} already exists. Skipping...")
+            print(f"PDF {output_pdf} 已存在，跳过...")
            continue
-        # 创建目录
+        # 创建PDF目录
        os.makedirs("./PDF", exist_ok=True)
        # 创建PDF
        create_pdf_from_images(folder_path, output_pdf)
        # 加密PDF
-        encrypt_pdf(output_pdf, output_pdf, "4000")  # 密码设置为 4000
+        encrypt_pdf(output_pdf, output_pdf, "4000")  # 密码为4000
-        # 返回PDF位置，用于发送到群里
+        # 返回PDF绝对路径
        return os.path.abspath(output_pdf)
 # 示例用法
 if __name__ == "__main__":
    generate_pdf_from_images("./")