调整chrome内容

2025-05-06 17:14:44 +08:00
parent 3b8acf8864
commit 41edf3e3d9
1 changed files with 52 additions and 16 deletions
--- a/xiuren/meitu_dl.py
+++ b/xiuren/meitu_dl.py
@@ -1,3 +1,5 @@
+import random
+
 import requests
 from bs4 import BeautifulSoup
 import time
@@ -123,19 +125,50 @@ def fetch_images(post_url):
    return images


-def download_image(img_url, folder_path, img_index):
-    try:
-        response = requests.get(img_url, headers=headers, timeout=10)
-        response.raise_for_status()
+def download_image(img_url, folder_path, img_index, max_retries=3):
+    for attempt in range(max_retries):
+        try:
+            # 构建特定的headers
+            local_headers = headers.copy()
+            local_headers['Referer'] = img_url  # 使用图片URL作为referer
+            
+            # 添加一些额外的headers模拟真实浏览器
+            local_headers.update({
+                'Accept': 'image/avif,image/webp,image/apng,image/*,*/*;q=0.8',
+                'Accept-Encoding': 'gzip, deflate, br',
+                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+                'Cache-Control': 'no-cache',
+                'Pragma': 'no-cache'
+            })
+            
+            response = requests.get(img_url, headers=local_headers, timeout=10)
+            response.raise_for_status()
+            
+            # 验证内容类型
+            content_type = response.headers.get('content-type', '')
+            if not content_type.startswith('image/'):
+                print(f"尝试 {attempt + 1}/{max_retries}: 返回内容不是图片类型 ({content_type})，等待后重试...")
+                time.sleep(2 * (attempt + 1))
+                continue

-        img = Image.open(BytesIO(response.content)).convert('RGB')
-        img_name = f"{img_index:03d}.jpg"
-        img_path = os.path.join(folder_path, img_name)
+            img = Image.open(BytesIO(response.content)).convert('RGB')
+            img_name = f"{img_index:03d}.jpg"
+            img_path = os.path.join(folder_path, img_name)

-        img.save(img_path, 'JPEG', quality=95)
-        print(f"已下载并转换为JPG: {img_path}")
-    except Exception as e:
-        print(f"处理图片 {img_url} 失败: {e}")
+            img.save(img_path, 'JPEG', quality=95)
+            print(f"已下载并转换为JPG: {img_path}")
+            return True
+            
+        except Exception as e:
+            print(f"尝试 {attempt + 1}/{max_retries} 下载图片失败: {e}")
+            if attempt < max_retries - 1:
+                wait_time = random.uniform(2, 5) * (attempt + 1)  # 随机递增等待时间
+                print(f"等待 {wait_time:.1f} 秒后重试...")
+                time.sleep(wait_time)
+            continue
+    
+    print(f"图片 {img_url} 下载失败，已达到最大重试次数")
+    return False


 def meitu_dowload_pic(dl_path, dl_url):
@@ -164,11 +197,14 @@ def meitu_dowload_pic(dl_path, dl_url):
        if images:
            print(f"共找到 {len(images)} 张图片，开始下载...")
            for idx, img_url in enumerate(images, 1):
-                download_image(img_url, folder_path, idx)
-        else:
-            print("未找到图片，可能需要调整策略。")
-
-        time.sleep(1)
+                # 增加随机延时
+                time.sleep(random.uniform(1, 3))
+                if not download_image(img_url, folder_path, idx):
+                    print(f"图片 {img_url} 下载失败，继续下一张...")
+                    continue
+        
+        # 每个帖子之间增加随机延时
+        time.sleep(random.uniform(3, 6))

    return download_root