From 535946d4eac0bd1ee0a1856c33100d91753b81a3 Mon Sep 17 00:00:00 2001 From: liuwei Date: Sat, 8 Feb 2025 16:00:21 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=8E=92=E7=89=88=EF=BC=8C?= =?UTF-8?q?=E8=8E=B7=E5=8F=962=E4=B8=AA=E5=9B=BE=E7=89=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sehuatang/shehuatang.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sehuatang/shehuatang.py b/sehuatang/shehuatang.py index b1d246b..cabeb7d 100644 --- a/sehuatang/shehuatang.py +++ b/sehuatang/shehuatang.py @@ -74,9 +74,6 @@ def fetch_and_create_pdf(url): # 获取今天的日期 today = datetime.now().strftime('%Y-%m-%d') - # 设置PDF - pdf_filename = f"JAV-{today}-{len(posts)}.pdf" - doc = SimpleDocTemplate(pdf_filename, pagesize=letter) # 注册中文字体 pdfmetrics.registerFont(TTFont('SamHei', 'fonts/simhei.ttf')) # 设置中文字体路径 @@ -101,6 +98,10 @@ def fetch_and_create_pdf(url): if post_time_span: # 判断是否存在post_time_span,即认为是当天发布的帖子 today_posts.append(post) + # 设置PDF + pdf_filename = f"JAV-{today}-{len(today_posts)}.pdf" + doc = SimpleDocTemplate(pdf_filename, pagesize=letter) + # 遍历当天的帖子并提取信息 for post in today_posts: # 查找帖子标题 @@ -108,7 +109,7 @@ def fetch_and_create_pdf(url): if title: post_title = title.get_text() post_url = title.get('href') - + print(post_title) # 获取帖子的页面 post_page_url = 'https://www.sehuatang.net/' + post_url driver.get(post_page_url) @@ -147,9 +148,9 @@ def fetch_and_create_pdf(url): image_links = [] images = content_div.find_all('img') for img in images: - if img.get('src') and 'http' in img.get('src'): - image_links.append(img.get('src')) - + if img.get('zoomfile') and 'http' in img.get('zoomfile'): + image_links.append(img.get('zoomfile')) + print(image_links) if image_links: for img_link in image_links: image = download_image(img_link)