优化排版,获取2个图片

This commit is contained in:
liuwei
2025-02-08 16:00:21 +08:00
parent 671a89325f
commit 535946d4ea

View File

@@ -74,9 +74,6 @@ def fetch_and_create_pdf(url):
# 获取今天的日期 # 获取今天的日期
today = datetime.now().strftime('%Y-%m-%d') today = datetime.now().strftime('%Y-%m-%d')
# 设置PDF
pdf_filename = f"JAV-{today}-{len(posts)}.pdf"
doc = SimpleDocTemplate(pdf_filename, pagesize=letter)
# 注册中文字体 # 注册中文字体
pdfmetrics.registerFont(TTFont('SamHei', 'fonts/simhei.ttf')) # 设置中文字体路径 pdfmetrics.registerFont(TTFont('SamHei', 'fonts/simhei.ttf')) # 设置中文字体路径
@@ -101,6 +98,10 @@ def fetch_and_create_pdf(url):
if post_time_span: # 判断是否存在post_time_span即认为是当天发布的帖子 if post_time_span: # 判断是否存在post_time_span即认为是当天发布的帖子
today_posts.append(post) today_posts.append(post)
# 设置PDF
pdf_filename = f"JAV-{today}-{len(today_posts)}.pdf"
doc = SimpleDocTemplate(pdf_filename, pagesize=letter)
# 遍历当天的帖子并提取信息 # 遍历当天的帖子并提取信息
for post in today_posts: for post in today_posts:
# 查找帖子标题 # 查找帖子标题
@@ -108,7 +109,7 @@ def fetch_and_create_pdf(url):
if title: if title:
post_title = title.get_text() post_title = title.get_text()
post_url = title.get('href') post_url = title.get('href')
print(post_title)
# 获取帖子的页面 # 获取帖子的页面
post_page_url = 'https://www.sehuatang.net/' + post_url post_page_url = 'https://www.sehuatang.net/' + post_url
driver.get(post_page_url) driver.get(post_page_url)
@@ -147,9 +148,9 @@ def fetch_and_create_pdf(url):
image_links = [] image_links = []
images = content_div.find_all('img') images = content_div.find_all('img')
for img in images: for img in images:
if img.get('src') and 'http' in img.get('src'): if img.get('zoomfile') and 'http' in img.get('zoomfile'):
image_links.append(img.get('src')) image_links.append(img.get('zoomfile'))
print(image_links)
if image_links: if image_links:
for img_link in image_links: for img_link in image_links:
image = download_image(img_link) image = download_image(img_link)