优化下载目录检测功能
This commit is contained in:
@@ -10,8 +10,6 @@ from selenium.webdriver.common.by import By
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
|
||||
from xiuren.xiuren_pdf import generate_pdf_from_images
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Referer': 'https://www.mntuce.com/'
|
||||
@@ -22,7 +20,7 @@ download_root = "xiuren" # 全局定义下载根目录
|
||||
download_root_heisi = 'xiuren/heisi'
|
||||
|
||||
|
||||
def fetch_posts(base_url, posts_per_batch=10):
|
||||
def fetch_posts(base_url,dl_path, posts_per_batch=10):
|
||||
posts = []
|
||||
page = 1
|
||||
|
||||
@@ -45,7 +43,7 @@ def fetch_posts(base_url, posts_per_batch=10):
|
||||
# 检查帖子是否已下载
|
||||
match = re.search(r'(?:[Nn][Oo]|[Vv][Oo][Ll])\.(\d+)', post_title) # 支持 "No." 或 "
|
||||
folder_name = match.group(1) if match else f"unknown_{len(posts) + 1}"
|
||||
folder_path = os.path.join(download_root, folder_name)
|
||||
folder_path = os.path.join(dl_path, folder_name)
|
||||
|
||||
if post_url not in seen_posts:
|
||||
if os.path.exists(folder_path):
|
||||
@@ -127,7 +125,7 @@ def meitu_dowload_pic(dl_path, dl_url):
|
||||
os.makedirs(dl_path)
|
||||
|
||||
print(f"开始爬取 {base_url} 的帖子...")
|
||||
posts = fetch_posts(base_url, 10)
|
||||
posts = fetch_posts(base_url,dl_path, 10)
|
||||
|
||||
if not posts:
|
||||
print("未获取到符合条件的帖子,请检查选择器或网络连接。")
|
||||
|
||||
Reference in New Issue
Block a user