优化排版，获取2个图片

2025-02-08 16:00:21 +08:00
parent 671a89325f
commit 535946d4ea
1 changed files with 8 additions and 7 deletions
--- a/sehuatang/shehuatang.py
+++ b/sehuatang/shehuatang.py
@@ -74,9 +74,6 @@ def fetch_and_create_pdf(url):
    # 获取今天的日期
    today = datetime.now().strftime('%Y-%m-%d')

-    # 设置PDF
-    pdf_filename = f"JAV-{today}-{len(posts)}.pdf"
-    doc = SimpleDocTemplate(pdf_filename, pagesize=letter)

    # 注册中文字体
    pdfmetrics.registerFont(TTFont('SamHei', 'fonts/simhei.ttf'))  # 设置中文字体路径
@@ -101,6 +98,10 @@ def fetch_and_create_pdf(url):
        if post_time_span:  # 判断是否存在post_time_span，即认为是当天发布的帖子
            today_posts.append(post)

+    # 设置PDF
+    pdf_filename = f"JAV-{today}-{len(today_posts)}.pdf"
+    doc = SimpleDocTemplate(pdf_filename, pagesize=letter)
+
    # 遍历当天的帖子并提取信息
    for post in today_posts:
        # 查找帖子标题
@@ -108,7 +109,7 @@ def fetch_and_create_pdf(url):
        if title:
            post_title = title.get_text()
            post_url = title.get('href')
-
+            print(post_title)
            # 获取帖子的页面
            post_page_url = 'https://www.sehuatang.net/' + post_url
            driver.get(post_page_url)
@@ -147,9 +148,9 @@ def fetch_and_create_pdf(url):
                image_links = []
                images = content_div.find_all('img')
                for img in images:
-                    if img.get('src') and 'http' in img.get('src'):
-                        image_links.append(img.get('src'))
-
+                    if img.get('zoomfile') and 'http' in img.get('zoomfile'):
+                        image_links.append(img.get('zoomfile'))
+                print(image_links)
                if image_links:
                    for img_link in image_links:
                        image = download_image(img_link)