From 41edf3e3d988448fae2e6bf240bf09ceb0d10c19 Mon Sep 17 00:00:00 2001 From: liuwei Date: Tue, 6 May 2025 17:14:44 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4chrome=E5=86=85=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- xiuren/meitu_dl.py | 68 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/xiuren/meitu_dl.py b/xiuren/meitu_dl.py index 5ef1301..5fc595e 100644 --- a/xiuren/meitu_dl.py +++ b/xiuren/meitu_dl.py @@ -1,3 +1,5 @@ +import random + import requests from bs4 import BeautifulSoup import time @@ -123,19 +125,50 @@ def fetch_images(post_url): return images -def download_image(img_url, folder_path, img_index): - try: - response = requests.get(img_url, headers=headers, timeout=10) - response.raise_for_status() +def download_image(img_url, folder_path, img_index, max_retries=3): + for attempt in range(max_retries): + try: + # 构建特定的headers + local_headers = headers.copy() + local_headers['Referer'] = img_url # 使用图片URL作为referer + + # 添加一些额外的headers模拟真实浏览器 + local_headers.update({ + 'Accept': 'image/avif,image/webp,image/apng,image/*,*/*;q=0.8', + 'Accept-Encoding': 'gzip, deflate, br', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'Cache-Control': 'no-cache', + 'Pragma': 'no-cache' + }) + + response = requests.get(img_url, headers=local_headers, timeout=10) + response.raise_for_status() + + # 验证内容类型 + content_type = response.headers.get('content-type', '') + if not content_type.startswith('image/'): + print(f"尝试 {attempt + 1}/{max_retries}: 返回内容不是图片类型 ({content_type}),等待后重试...") + time.sleep(2 * (attempt + 1)) + continue - img = Image.open(BytesIO(response.content)).convert('RGB') - img_name = f"{img_index:03d}.jpg" - img_path = os.path.join(folder_path, img_name) + img = Image.open(BytesIO(response.content)).convert('RGB') + img_name = f"{img_index:03d}.jpg" + img_path = os.path.join(folder_path, img_name) - img.save(img_path, 'JPEG', quality=95) - print(f"已下载并转换为JPG: {img_path}") - except Exception as e: - print(f"处理图片 {img_url} 失败: {e}") + img.save(img_path, 'JPEG', quality=95) + print(f"已下载并转换为JPG: {img_path}") + return True + + except Exception as e: + print(f"尝试 {attempt + 1}/{max_retries} 下载图片失败: {e}") + if attempt < max_retries - 1: + wait_time = random.uniform(2, 5) * (attempt + 1) # 随机递增等待时间 + print(f"等待 {wait_time:.1f} 秒后重试...") + time.sleep(wait_time) + continue + + print(f"图片 {img_url} 下载失败,已达到最大重试次数") + return False def meitu_dowload_pic(dl_path, dl_url): @@ -164,11 +197,14 @@ def meitu_dowload_pic(dl_path, dl_url): if images: print(f"共找到 {len(images)} 张图片,开始下载...") for idx, img_url in enumerate(images, 1): - download_image(img_url, folder_path, idx) - else: - print("未找到图片,可能需要调整策略。") - - time.sleep(1) + # 增加随机延时 + time.sleep(random.uniform(1, 3)) + if not download_image(img_url, folder_path, idx): + print(f"图片 {img_url} 下载失败,继续下一张...") + continue + + # 每个帖子之间增加随机延时 + time.sleep(random.uniform(3, 6)) return download_root