调整chrome内容
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
import random
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import time
|
||||
@@ -123,19 +125,50 @@ def fetch_images(post_url):
|
||||
return images
|
||||
|
||||
|
||||
def download_image(img_url, folder_path, img_index):
|
||||
try:
|
||||
response = requests.get(img_url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
def download_image(img_url, folder_path, img_index, max_retries=3):
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# 构建特定的headers
|
||||
local_headers = headers.copy()
|
||||
local_headers['Referer'] = img_url # 使用图片URL作为referer
|
||||
|
||||
# 添加一些额外的headers模拟真实浏览器
|
||||
local_headers.update({
|
||||
'Accept': 'image/avif,image/webp,image/apng,image/*,*/*;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Pragma': 'no-cache'
|
||||
})
|
||||
|
||||
response = requests.get(img_url, headers=local_headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
# 验证内容类型
|
||||
content_type = response.headers.get('content-type', '')
|
||||
if not content_type.startswith('image/'):
|
||||
print(f"尝试 {attempt + 1}/{max_retries}: 返回内容不是图片类型 ({content_type}),等待后重试...")
|
||||
time.sleep(2 * (attempt + 1))
|
||||
continue
|
||||
|
||||
img = Image.open(BytesIO(response.content)).convert('RGB')
|
||||
img_name = f"{img_index:03d}.jpg"
|
||||
img_path = os.path.join(folder_path, img_name)
|
||||
img = Image.open(BytesIO(response.content)).convert('RGB')
|
||||
img_name = f"{img_index:03d}.jpg"
|
||||
img_path = os.path.join(folder_path, img_name)
|
||||
|
||||
img.save(img_path, 'JPEG', quality=95)
|
||||
print(f"已下载并转换为JPG: {img_path}")
|
||||
except Exception as e:
|
||||
print(f"处理图片 {img_url} 失败: {e}")
|
||||
img.save(img_path, 'JPEG', quality=95)
|
||||
print(f"已下载并转换为JPG: {img_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"尝试 {attempt + 1}/{max_retries} 下载图片失败: {e}")
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = random.uniform(2, 5) * (attempt + 1) # 随机递增等待时间
|
||||
print(f"等待 {wait_time:.1f} 秒后重试...")
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
|
||||
print(f"图片 {img_url} 下载失败,已达到最大重试次数")
|
||||
return False
|
||||
|
||||
|
||||
def meitu_dowload_pic(dl_path, dl_url):
|
||||
@@ -164,11 +197,14 @@ def meitu_dowload_pic(dl_path, dl_url):
|
||||
if images:
|
||||
print(f"共找到 {len(images)} 张图片,开始下载...")
|
||||
for idx, img_url in enumerate(images, 1):
|
||||
download_image(img_url, folder_path, idx)
|
||||
else:
|
||||
print("未找到图片,可能需要调整策略。")
|
||||
|
||||
time.sleep(1)
|
||||
# 增加随机延时
|
||||
time.sleep(random.uniform(1, 3))
|
||||
if not download_image(img_url, folder_path, idx):
|
||||
print(f"图片 {img_url} 下载失败,继续下一张...")
|
||||
continue
|
||||
|
||||
# 每个帖子之间增加随机延时
|
||||
time.sleep(random.uniform(3, 6))
|
||||
|
||||
return download_root
|
||||
|
||||
|
||||
Reference in New Issue
Block a user