优化排版,获取2个图片
This commit is contained in:
@@ -74,9 +74,6 @@ def fetch_and_create_pdf(url):
|
|||||||
# 获取今天的日期
|
# 获取今天的日期
|
||||||
today = datetime.now().strftime('%Y-%m-%d')
|
today = datetime.now().strftime('%Y-%m-%d')
|
||||||
|
|
||||||
# 设置PDF
|
|
||||||
pdf_filename = f"JAV-{today}-{len(posts)}.pdf"
|
|
||||||
doc = SimpleDocTemplate(pdf_filename, pagesize=letter)
|
|
||||||
|
|
||||||
# 注册中文字体
|
# 注册中文字体
|
||||||
pdfmetrics.registerFont(TTFont('SamHei', 'fonts/simhei.ttf')) # 设置中文字体路径
|
pdfmetrics.registerFont(TTFont('SamHei', 'fonts/simhei.ttf')) # 设置中文字体路径
|
||||||
@@ -101,6 +98,10 @@ def fetch_and_create_pdf(url):
|
|||||||
if post_time_span: # 判断是否存在post_time_span,即认为是当天发布的帖子
|
if post_time_span: # 判断是否存在post_time_span,即认为是当天发布的帖子
|
||||||
today_posts.append(post)
|
today_posts.append(post)
|
||||||
|
|
||||||
|
# 设置PDF
|
||||||
|
pdf_filename = f"JAV-{today}-{len(today_posts)}.pdf"
|
||||||
|
doc = SimpleDocTemplate(pdf_filename, pagesize=letter)
|
||||||
|
|
||||||
# 遍历当天的帖子并提取信息
|
# 遍历当天的帖子并提取信息
|
||||||
for post in today_posts:
|
for post in today_posts:
|
||||||
# 查找帖子标题
|
# 查找帖子标题
|
||||||
@@ -108,7 +109,7 @@ def fetch_and_create_pdf(url):
|
|||||||
if title:
|
if title:
|
||||||
post_title = title.get_text()
|
post_title = title.get_text()
|
||||||
post_url = title.get('href')
|
post_url = title.get('href')
|
||||||
|
print(post_title)
|
||||||
# 获取帖子的页面
|
# 获取帖子的页面
|
||||||
post_page_url = 'https://www.sehuatang.net/' + post_url
|
post_page_url = 'https://www.sehuatang.net/' + post_url
|
||||||
driver.get(post_page_url)
|
driver.get(post_page_url)
|
||||||
@@ -147,9 +148,9 @@ def fetch_and_create_pdf(url):
|
|||||||
image_links = []
|
image_links = []
|
||||||
images = content_div.find_all('img')
|
images = content_div.find_all('img')
|
||||||
for img in images:
|
for img in images:
|
||||||
if img.get('src') and 'http' in img.get('src'):
|
if img.get('zoomfile') and 'http' in img.get('zoomfile'):
|
||||||
image_links.append(img.get('src'))
|
image_links.append(img.get('zoomfile'))
|
||||||
|
print(image_links)
|
||||||
if image_links:
|
if image_links:
|
||||||
for img_link in image_links:
|
for img_link in image_links:
|
||||||
image = download_image(img_link)
|
image = download_image(img_link)
|
||||||
|
|||||||
Reference in New Issue
Block a user