解决句柄占用问题。
This commit is contained in:
@@ -19,6 +19,7 @@ from PIL import Image as PILImage
|
|||||||
import re
|
import re
|
||||||
from PyPDF2 import PdfReader, PdfWriter
|
from PyPDF2 import PdfReader, PdfWriter
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
# download_image 函数保持不变
|
# download_image 函数保持不变
|
||||||
def download_image(url):
|
def download_image(url):
|
||||||
@@ -37,7 +38,7 @@ def download_image(url):
|
|||||||
image = BytesIO(response.content)
|
image = BytesIO(response.content)
|
||||||
return image
|
return image
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
print(f"下载图片失败: {e}")
|
logger.info(f"下载图片失败: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -65,16 +66,16 @@ def fetch_and_create_pdf(url):
|
|||||||
|
|
||||||
driver = webdriver.Chrome(service=Service(chrome_driver_path), options=options)
|
driver = webdriver.Chrome(service=Service(chrome_driver_path), options=options)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"初始化ChromeDriver失败: {e}")
|
logger.info(f"初始化ChromeDriver失败: {e}")
|
||||||
chrome_driver_path = ChromeDriverManager().install()
|
chrome_driver_path = ChromeDriverManager().install()
|
||||||
driver = webdriver.Chrome(service=Service(chrome_driver_path), options=options)
|
driver = webdriver.Chrome(service=Service(chrome_driver_path), options=options)
|
||||||
|
|
||||||
# 如果本地没有chromedriver.exe,则使用webdriver_manager下载一次
|
# 如果本地没有chromedriver.exe,则使用webdriver_manager下载一次
|
||||||
if not os.path.exists(chrome_driver_path):
|
if not os.path.exists(chrome_driver_path):
|
||||||
chrome_driver_path = ChromeDriverManager().install()
|
chrome_driver_path = ChromeDriverManager().install()
|
||||||
print(f"ChromeDriver已下载到: {chrome_driver_path}")
|
logger.info(f"ChromeDriver已下载到: {chrome_driver_path}")
|
||||||
else:
|
else:
|
||||||
print(f"使用本地ChromeDriver: {chrome_driver_path}")
|
logger.info(f"使用本地ChromeDriver: {chrome_driver_path}")
|
||||||
|
|
||||||
driver = webdriver.Chrome(service=Service(chrome_driver_path), options=options)
|
driver = webdriver.Chrome(service=Service(chrome_driver_path), options=options)
|
||||||
|
|
||||||
@@ -86,10 +87,10 @@ def fetch_and_create_pdf(url):
|
|||||||
try:
|
try:
|
||||||
enter_button = driver.find_element(By.XPATH, '//a[contains(text(), "满18岁,请点此进入")]')
|
enter_button = driver.find_element(By.XPATH, '//a[contains(text(), "满18岁,请点此进入")]')
|
||||||
enter_button.click()
|
enter_button.click()
|
||||||
print("点击了满18岁按钮")
|
logger.info("点击了满18岁按钮")
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("未找到满18岁按钮,跳过此步骤", e)
|
logger.info("未找到满18岁按钮,跳过此步骤", e)
|
||||||
|
|
||||||
# 解析页面
|
# 解析页面
|
||||||
html = driver.page_source
|
html = driver.page_source
|
||||||
@@ -143,7 +144,7 @@ def fetch_and_create_pdf(url):
|
|||||||
if title:
|
if title:
|
||||||
post_title = title.get_text()
|
post_title = title.get_text()
|
||||||
post_url = title.get('href')
|
post_url = title.get('href')
|
||||||
print(post_title)
|
logger.info(post_title)
|
||||||
|
|
||||||
# 获取帖子内容
|
# 获取帖子内容
|
||||||
post_page_url = 'https://www.sehuatang.net/' + post_url
|
post_page_url = 'https://www.sehuatang.net/' + post_url
|
||||||
@@ -200,9 +201,9 @@ def fetch_and_create_pdf(url):
|
|||||||
# 添加图片到内容中,使用计算后的尺寸
|
# 添加图片到内容中,使用计算后的尺寸
|
||||||
content.append(Image(img_stream, width=new_width, height=new_height))
|
content.append(Image(img_stream, width=new_width, height=new_height))
|
||||||
content.append(Spacer(1, 4))
|
content.append(Spacer(1, 4))
|
||||||
print(f"处理图片: 原始尺寸 {img_width}x{img_height}, 新尺寸 {new_width}x{new_height}")
|
logger.info(f"处理图片: 原始尺寸 {img_width}x{img_height}, 新尺寸 {new_width}x{new_height}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"处理图片时出错: {e}")
|
logger.info(f"处理图片时出错: {e}")
|
||||||
|
|
||||||
# 在每个帖子后添加分页符(除了最后一页)
|
# 在每个帖子后添加分页符(除了最后一页)
|
||||||
if post != today_posts[-1]:
|
if post != today_posts[-1]:
|
||||||
@@ -212,7 +213,7 @@ def fetch_and_create_pdf(url):
|
|||||||
try:
|
try:
|
||||||
doc.build(content)
|
doc.build(content)
|
||||||
absolute_pdf_path = os.path.abspath(pdf_filename)
|
absolute_pdf_path = os.path.abspath(pdf_filename)
|
||||||
print(f"PDF saved as {absolute_pdf_path}")
|
logger.info(f"PDF saved as {absolute_pdf_path}")
|
||||||
|
|
||||||
# 加密PDF
|
# 加密PDF
|
||||||
add_pdf_encryption(absolute_pdf_path)
|
add_pdf_encryption(absolute_pdf_path)
|
||||||
@@ -220,7 +221,7 @@ def fetch_and_create_pdf(url):
|
|||||||
|
|
||||||
return absolute_pdf_path
|
return absolute_pdf_path
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"生成PDF时出错: {e}")
|
logger.info(f"生成PDF时出错: {e}")
|
||||||
driver.quit()
|
driver.quit()
|
||||||
# 如果生成失败,返回一个默认路径或空字符串
|
# 如果生成失败,返回一个默认路径或空字符串
|
||||||
return ""
|
return ""
|
||||||
@@ -237,9 +238,9 @@ def add_pdf_encryption(pdf_file, password="4000"):
|
|||||||
pdf_writer.encrypt(password)
|
pdf_writer.encrypt(password)
|
||||||
with open(pdf_file, "wb") as output_pdf:
|
with open(pdf_file, "wb") as output_pdf:
|
||||||
pdf_writer.write(output_pdf)
|
pdf_writer.write(output_pdf)
|
||||||
print(f"PDF加密成功,密码为: {password}")
|
logger.info(f"PDF加密成功,密码为: {password}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"PDF加密失败: {e}")
|
logger.info(f"PDF加密失败: {e}")
|
||||||
|
|
||||||
|
|
||||||
def pdf_file_path():
|
def pdf_file_path():
|
||||||
@@ -247,15 +248,15 @@ def pdf_file_path():
|
|||||||
url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
|
url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
|
||||||
pdf_path = fetch_and_create_pdf(url)
|
pdf_path = fetch_and_create_pdf(url)
|
||||||
if pdf_path:
|
if pdf_path:
|
||||||
print(f"返回的PDF文件路径:{pdf_path}")
|
logger.info(f"返回的PDF文件路径:{pdf_path}")
|
||||||
return pdf_path
|
return pdf_path
|
||||||
else:
|
else:
|
||||||
# 如果生成失败,返回一个默认的PDF路径
|
# 如果生成失败,返回一个默认的PDF路径
|
||||||
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
||||||
print(f"PDF生成失败,返回默认路径: {default_path}")
|
logger.info(f"PDF生成失败,返回默认路径: {default_path}")
|
||||||
return default_path
|
return default_path
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"生成PDF路径时出错: {e}")
|
logger.info(f"生成PDF路径时出错: {e}")
|
||||||
# 返回一个默认路径
|
# 返回一个默认路径
|
||||||
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
||||||
return default_path
|
return default_path
|
||||||
|
|||||||
Reference in New Issue
Block a user