sehuatang pdf 优化
This commit is contained in:
9
robot.py
9
robot.py
@@ -702,12 +702,9 @@ class Robot:
|
||||
async def generate_sehuatang_pdf(self):
|
||||
try:
|
||||
self.LOG.info("开始生成PDF,generate_sehuatang_pdf")
|
||||
try:
|
||||
path = pdf_file_path()
|
||||
except Exception as e:
|
||||
self.LOG.error(f"generate_sehuatang_pdf error: {e}")
|
||||
path = pdf_file_path_undetected()
|
||||
|
||||
tag, path = pdf_file_path()
|
||||
if not tag:
|
||||
tag, path = pdf_file_path_undetected()
|
||||
# 暂时只发4K群
|
||||
await self.send_group_file_message(path, Feature.PDF_CAPABILITY)
|
||||
except Exception as e:
|
||||
|
||||
@@ -23,6 +23,7 @@ from PyPDF2 import PdfReader, PdfWriter
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
# download_image 函数保持不变
|
||||
def download_image(url):
|
||||
"""下载大于100KB的图片并返回临时文件路径,仅支持jpg、jpeg和png格式"""
|
||||
@@ -57,7 +58,7 @@ def fetch_and_create_pdf(url):
|
||||
options.add_argument('--disable-logging')
|
||||
options.add_argument('--log-level=3')
|
||||
options.add_experimental_option('excludeSwitches', ['enable-automation', 'enable-logging'])
|
||||
|
||||
|
||||
# 根据操作系统选择不同的ChromeDriver路径处理方式
|
||||
if os.name == 'nt': # Windows
|
||||
chrome_driver_path = os.path.join(
|
||||
@@ -66,7 +67,7 @@ def fetch_and_create_pdf(url):
|
||||
)
|
||||
else: # Linux
|
||||
chrome_driver_path = '/usr/bin/chromedriver' # 使用系统PATH中的chromedriver
|
||||
|
||||
|
||||
try:
|
||||
if os.name == 'nt' and not os.path.exists(chrome_driver_path):
|
||||
chrome_driver_path = ChromeDriverManager().install()
|
||||
@@ -81,12 +82,14 @@ def fetch_and_create_pdf(url):
|
||||
# 获取目标页面
|
||||
driver.get(url)
|
||||
try:
|
||||
enter_button = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, '//a[contains(text(), "满18岁,请点此进入")]')))
|
||||
enter_button = WebDriverWait(driver, 5).until(
|
||||
EC.element_to_be_clickable((By.XPATH, '//a[contains(text(), "满18岁,请点此进入")]')))
|
||||
enter_button.click()
|
||||
logger.debug("点击了满18岁按钮")
|
||||
except Exception as e:
|
||||
logger.warning(f"未找到满18岁按钮,跳过此步骤: {e}")
|
||||
WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tbody[id^="normalthread"]')))
|
||||
WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tbody[id^="normalthread"]')))
|
||||
|
||||
# 处理年龄验证按钮
|
||||
try:
|
||||
@@ -132,16 +135,16 @@ def fetch_and_create_pdf(url):
|
||||
|
||||
# 设置PDF
|
||||
# 修改PDF文件路径到项目根目录的temp目录下
|
||||
pdf_filename = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
|
||||
'temp',
|
||||
f"JAV-{today}-{len(today_posts)}.pdf")
|
||||
pdf_filename = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
|
||||
'temp',
|
||||
f"JAV-{today}-{len(today_posts)}.pdf")
|
||||
doc = SimpleDocTemplate(pdf_filename, pagesize=A3)
|
||||
|
||||
|
||||
# 计算内容区域的宽度和高度
|
||||
page_width, page_height = A3
|
||||
content_width = page_width - doc.rightMargin - doc.leftMargin
|
||||
content_height = page_height - doc.topMargin - doc.bottomMargin
|
||||
|
||||
|
||||
# 设置最大图片尺寸,留出一些边距
|
||||
max_image_width = content_width * 0.95
|
||||
max_image_height = content_height * 0.7 # 留出足够空间给文本和其他元素
|
||||
@@ -210,19 +213,20 @@ def fetch_and_create_pdf(url):
|
||||
scale_width = max_image_width / img_width
|
||||
scale_height = max_image_height / img_height
|
||||
scale = min(scale_width, scale_height, 1.0) # 不超过原始大小
|
||||
|
||||
|
||||
# 计算新的尺寸
|
||||
new_width = img_width * scale
|
||||
new_height = img_height * scale
|
||||
|
||||
|
||||
# 重置文件指针
|
||||
image.seek(0)
|
||||
img_stream = BytesIO(image.getvalue())
|
||||
|
||||
|
||||
# 添加图片到内容中,使用计算后的尺寸
|
||||
content.append(Image(img_stream, width=new_width, height=new_height))
|
||||
content.append(Spacer(1, 4))
|
||||
logger.debug(f"处理图片: 原始尺寸 {img_width}x{img_height}, 新尺寸 {new_width}x{new_height}")
|
||||
logger.debug(
|
||||
f"处理图片: 原始尺寸 {img_width}x{img_height}, 新尺寸 {new_width}x{new_height}")
|
||||
except Exception as e:
|
||||
logger.error(f"处理图片时出错: {e}")
|
||||
|
||||
@@ -264,6 +268,7 @@ def fetch_and_create_pdf(url):
|
||||
except Exception as e2:
|
||||
logger.error(f"强制终止Chrome进程失败: {e2}")
|
||||
|
||||
|
||||
# add_pdf_encryption 和 pdf_file_path 函数保持不变
|
||||
def add_pdf_encryption(pdf_file, password="4000"):
|
||||
"""使用PyPDF2为PDF添加加密保护"""
|
||||
@@ -282,21 +287,21 @@ def add_pdf_encryption(pdf_file, password="4000"):
|
||||
|
||||
def pdf_file_path():
|
||||
try:
|
||||
url = 'https://www.sehuatang.org/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
|
||||
url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
|
||||
pdf_path = fetch_and_create_pdf(url)
|
||||
if pdf_path:
|
||||
logger.info(f"返回的PDF文件路径:{pdf_path}")
|
||||
return pdf_path
|
||||
return True, pdf_path
|
||||
else:
|
||||
# 如果生成失败,返回一个默认的PDF路径
|
||||
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
||||
logger.info(f"PDF生成失败,返回默认路径: {default_path}")
|
||||
return default_path
|
||||
return False, default_path
|
||||
except Exception as e:
|
||||
logger.error(f"生成PDF路径时出错: {e}")
|
||||
# 返回一个默认路径
|
||||
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
||||
return default_path
|
||||
return False, default_path
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -3,6 +3,7 @@ import os
|
||||
import requests
|
||||
from io import BytesIO
|
||||
import undetected_chromedriver as uc
|
||||
|
||||
if os.name == 'nt':
|
||||
try:
|
||||
uc.Chrome.__del__ = lambda self: None
|
||||
@@ -172,8 +173,22 @@ def fetch_and_create_pdf(url):
|
||||
|
||||
|
||||
def pdf_file_path_undetected():
|
||||
url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
|
||||
return fetch_and_create_pdf(url)
|
||||
try:
|
||||
url = 'https://www.sehuatang.net/forum.php?mod=forumdisplay&fid=103&filter=typeid&typeid=481'
|
||||
pdf_path = fetch_and_create_pdf(url)
|
||||
if pdf_path:
|
||||
logger.info(f"返回的PDF文件路径:{pdf_path}")
|
||||
return True, pdf_path
|
||||
else:
|
||||
# 如果生成失败,返回一个默认的PDF路径
|
||||
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
||||
logger.info(f"PDF生成失败,返回默认路径: {default_path}")
|
||||
return False, default_path
|
||||
except Exception as e:
|
||||
logger.error(f"生成PDF路径时出错: {e}")
|
||||
# 返回一个默认路径
|
||||
default_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default.pdf")
|
||||
return False, default_path
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user