进行图片压缩，控制没张在300KB

2025-03-03 09:27:26 +08:00
parent 426a2451ee
commit c7a3b151b4
1 changed files with 90 additions and 30 deletions
--- a/xiuren/xiuren_pdf.py
+++ b/xiuren/xiuren_pdf.py
@@ -3,60 +3,114 @@ from reportlab.lib.pagesizes import A3
 from reportlab.platypus import SimpleDocTemplate, Image
 from PyPDF2 import PdfReader, PdfWriter
 from PIL import Image as PILImage
+import io
+
+
+def compress_image(image_path, target_size_kb=300):
+    """快速压缩图片到目标大小（单位：KB）"""
+    img = PILImage.open(image_path)
+
+    # 如果图片有透明度，转换为RGB
+    if img.mode in ('RGBA', 'P'):
+        img = img.convert('RGB')
+
+    # 获取原始文件大小（KB）
+    original_size_kb = os.path.getsize(image_path) / 1024
+
+    # 如果原始大小已小于目标大小，直接返回原始数据
+    if original_size_kb <= target_size_kb:
+        output = io.BytesIO()
+        img.save(output, format='JPEG', quality=85)
+        return output.getvalue()
+
+    # 根据原始大小和目标大小预估初始质量
+    # 假设质量与文件大小近似线性关系，设置一个初始值
+    estimated_quality = min(95, max(10, int(85 * (target_size_kb / original_size_kb))))
+
+    output = io.BytesIO()
+    img.save(output, format='JPEG', quality=estimated_quality)
+    size_kb = len(output.getvalue()) / 1024
+
+    # 如果预估结果偏差较大，使用二分法调整（最多尝试3次）
+    low, high = 10, 95
+    for _ in range(3):
+        if size_kb <= target_size_kb * 0.9 or size_kb >= target_size_kb * 1.1:  # 允许±10%偏差
+            quality = (low + high) // 2
+            output.seek(0)
+            output.truncate(0)
+            img.save(output, format='JPEG', quality=quality)
+            size_kb = len(output.getvalue()) / 1024
+
+            if size_kb > target_size_kb:
+                high = quality - 1
+            else:
+                low = quality + 1
+        else:
+            break
+
+    return output.getvalue()


 def create_pdf_from_images(directory, output_pdf):
+    """从目录中的图片创建PDF"""
    image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
    image_files = []

-    # 遍历目录并筛选图片文件
+    # 遍历目录，筛选图片文件
    for root, dirs, files in os.walk(directory):
        for file in files:
            _, ext = os.path.splitext(file)
            if ext.lower() in image_extensions:
                image_files.append(os.path.join(root, file))

-    # 如果没有找到任何图片，返回
+    # 如果没有找到图片，返回
    if not image_files:
-        print(f"No image files found in {directory}.")
+        print(f"未在 {directory} 中找到图片文件。")
        return

-    # 按文件名排序，确保图片按顺序
+    # 按文件名排序
    image_files.sort()

-    # 设置PDF文档，使用 A3 页面大小
+    # 设置PDF文档，使用A3页面大小
    doc = SimpleDocTemplate(output_pdf, pagesize=A3)

    # 创建图片列表
    image_list = []
    for image_file in image_files:
-        # 使用 Pillow 获取图片的尺寸
+        # 使用Pillow获取图片尺寸
        with PILImage.open(image_file) as img:
            img_width, img_height = img.size

-        # 设置目标宽度（适应A3页面宽度，最大842点）
-        target_width = 800  # 设置图片宽度为最大800点，适应A3页面
+        # 压缩图片到~300KB
+        compressed_image_data = compress_image(image_file)

-        # 计算新的高度，保持原始宽高比
+        # 从压缩字节数据中读取图片以获取新尺寸
+        compressed_img = PILImage.open(io.BytesIO(compressed_image_data))
+        img_width, img_height = compressed_img.size
+
+        # 设置目标宽度（适应A3页面宽度，最大800点）
+        target_width = 800
+
+        # 计算新高度，保持宽高比
        target_height = int(img_height * (target_width / img_width))

-        # 限制图片的最大高度，避免超出页面
-        max_page_height = 1091 - 100  # 留出一些空间，A3页面高度1191点
+        # 限制最大高度，避免超出页面
+        max_page_height = 1091 - 100  # A3页面高度1191点，留出空间
        if target_height > max_page_height:
            target_height = max_page_height
-            target_width = int(target_height * (img_width / img_height))  # 调整宽度保持比例
+            target_width = int(target_height * (img_width / img_height))

-        # 创建图片对象并添加到图片列表
-        img = Image(image_file, width=target_width, height=target_height)
+        # 创建图片对象并添加到列表
+        img = Image(io.BytesIO(compressed_image_data), width=target_width, height=target_height)
        image_list.append(img)

-    # 生成 PDF
+    # 生成PDF
    doc.build(image_list)
-    print(f"PDF {output_pdf} created successfully.")
+    print(f"PDF {output_pdf} 创建成功。")


 def encrypt_pdf(input_pdf, output_pdf, password):
-    # 使用 PyPDF2 加密 PDF
+    """加密PDF文件"""
    writer = PdfWriter()
    with open(input_pdf, "rb") as file:
        reader = PdfReader(file)
@@ -66,39 +120,45 @@ def encrypt_pdf(input_pdf, output_pdf, password):
    with open(output_pdf, "wb") as file:
        writer.encrypt(password)
        writer.write(file)
-    print(f"PDF {output_pdf} encrypted successfully.")
+    print(f"PDF {output_pdf} 加密成功。")


 def generate_pdf_from_images(directory):
-    # 获取当前目录下所有数字命名的文件夹
+    """从数字命名的文件夹生成PDF"""
+    # 获取目录下所有数字命名的文件夹
    folder_names = [folder for folder in os.listdir(directory)
                    if os.path.isdir(os.path.join(directory, folder)) and folder.isdigit()]

-    # 如果没有数字命名的文件夹，提示并返回
+    # 如果没有数字命名的文件夹，返回
    if not folder_names:
-        print("No numeric-named folders found.")
+        print("未找到数字命名的文件夹。")
        return

-    # 循环处理每个数字命名的文件夹
+    # 循环处理每个文件夹
    for folder_name in folder_names:
        folder_path = os.path.join(directory, folder_name)

-        # 设置 PDF 输出路径，使用文件夹名称作为文件名，并存储到 ./PDF 目录下
+        # 设置PDF输出路径
        output_pdf = f"./PDF/{folder_name}.pdf"

-        # 检查 PDF 文件是否已存在
+        # 如果PDF已存在，跳过
        if os.path.exists(output_pdf):
-            print(f"PDF {output_pdf} already exists. Skipping...")
+            print(f"PDF {output_pdf} 已存在，跳过...")
            continue

-        # 创建目录
+        # 创建PDF目录
        os.makedirs("./PDF", exist_ok=True)

-        # 创建 PDF
+        # 创建PDF
        create_pdf_from_images(folder_path, output_pdf)

-        # 加密 PDF
-        encrypt_pdf(output_pdf, output_pdf, "4000")  # 密码设置为 4000
+        # 加密PDF
+        encrypt_pdf(output_pdf, output_pdf, "4000")  # 密码为4000

-        # 返回PDF位置，用于发送到群里
+        # 返回PDF绝对路径
        return os.path.abspath(output_pdf)
+
+
+# 示例用法
+if __name__ == "__main__":
+    generate_pdf_from_images("./")