优化下载目录检测功能

This commit is contained in:
liuwei
2025-02-28 09:12:56 +08:00
parent 078819cd17
commit 29d74e2ab1

View File

@@ -10,8 +10,6 @@ from selenium.webdriver.common.by import By
from PIL import Image
from io import BytesIO
from xiuren.xiuren_pdf import generate_pdf_from_images
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124 Safari/537.36',
'Referer': 'https://www.mntuce.com/'
@@ -22,7 +20,7 @@ download_root = "xiuren" # 全局定义下载根目录
download_root_heisi = 'xiuren/heisi'
def fetch_posts(base_url, posts_per_batch=10):
def fetch_posts(base_url,dl_path, posts_per_batch=10):
posts = []
page = 1
@@ -45,7 +43,7 @@ def fetch_posts(base_url, posts_per_batch=10):
# 检查帖子是否已下载
match = re.search(r'(?:[Nn][Oo]|[Vv][Oo][Ll])\.(\d+)', post_title) # 支持 "No." 或 "
folder_name = match.group(1) if match else f"unknown_{len(posts) + 1}"
folder_path = os.path.join(download_root, folder_name)
folder_path = os.path.join(dl_path, folder_name)
if post_url not in seen_posts:
if os.path.exists(folder_path):
@@ -127,7 +125,7 @@ def meitu_dowload_pic(dl_path, dl_url):
os.makedirs(dl_path)
print(f"开始爬取 {base_url} 的帖子...")
posts = fetch_posts(base_url, 10)
posts = fetch_posts(base_url,dl_path, 10)
if not posts:
print("未获取到符合条件的帖子,请检查选择器或网络连接。")