diff --git a/sehuatang/shehuatang.py b/sehuatang/shehuatang.py index b1d246b..cabeb7d 100644 --- a/sehuatang/shehuatang.py +++ b/sehuatang/shehuatang.py @@ -74,9 +74,6 @@ def fetch_and_create_pdf(url): # 获取今天的日期 today = datetime.now().strftime('%Y-%m-%d') - # 设置PDF - pdf_filename = f"JAV-{today}-{len(posts)}.pdf" - doc = SimpleDocTemplate(pdf_filename, pagesize=letter) # 注册中文字体 pdfmetrics.registerFont(TTFont('SamHei', 'fonts/simhei.ttf')) # 设置中文字体路径 @@ -101,6 +98,10 @@ def fetch_and_create_pdf(url): if post_time_span: # 判断是否存在post_time_span,即认为是当天发布的帖子 today_posts.append(post) + # 设置PDF + pdf_filename = f"JAV-{today}-{len(today_posts)}.pdf" + doc = SimpleDocTemplate(pdf_filename, pagesize=letter) + # 遍历当天的帖子并提取信息 for post in today_posts: # 查找帖子标题 @@ -108,7 +109,7 @@ def fetch_and_create_pdf(url): if title: post_title = title.get_text() post_url = title.get('href') - + print(post_title) # 获取帖子的页面 post_page_url = 'https://www.sehuatang.net/' + post_url driver.get(post_page_url) @@ -147,9 +148,9 @@ def fetch_and_create_pdf(url): image_links = [] images = content_div.find_all('img') for img in images: - if img.get('src') and 'http' in img.get('src'): - image_links.append(img.get('src')) - + if img.get('zoomfile') and 'http' in img.get('zoomfile'): + image_links.append(img.get('zoomfile')) + print(image_links) if image_links: for img_link in image_links: image = download_image(img_link)