diff --git a/utils/sehuatang/shehuatang_undetected.py b/utils/sehuatang/shehuatang_undetected.py index 1518b3a..a5f20b3 100644 --- a/utils/sehuatang/shehuatang_undetected.py +++ b/utils/sehuatang/shehuatang_undetected.py @@ -25,6 +25,11 @@ from PIL import Image as PILImage import re from PyPDF2 import PdfReader, PdfWriter from loguru import logger +from urllib.parse import urlparse +from requests.adapters import HTTPAdapter +from urllib3.util import Retry + +UNREACHABLE_HOSTS = set() def download_image(url, session): @@ -32,9 +37,25 @@ def download_image(url, session): try: if not url.lower().endswith(('.jpg', '.jpeg', '.png')): return None - response = session.get(url, timeout=15) + parsed = urlparse(url) + host = parsed.netloc.lower() + if host in UNREACHABLE_HOSTS: + return None + headers = {'Referer': f'{parsed.scheme}://{host}/'} + response = session.get(url, headers=headers, timeout=10) response.raise_for_status() return BytesIO(response.content) + except requests.exceptions.RequestException as e: + try: + parsed = urlparse(url) + host = parsed.netloc.lower() + msg = str(e) + if 'Network is unreachable' in msg or 'Failed to establish a new connection' in msg: + UNREACHABLE_HOSTS.add(host) + except Exception: + pass + logger.warning(f"下载图片失败: {e}") + return None except Exception as e: logger.warning(f"下载图片失败: {e}") return None @@ -133,7 +154,16 @@ def fetch_and_create_pdf(url): # 同步 Session session = requests.Session() ua = driver.execute_script("return navigator.userAgent") - session.headers.update({'User-Agent': ua, 'Referer': 'https://tu.ymawv.la/'}) + session.headers.update({ + 'User-Agent': ua, + 'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*;q=0.8,*/*;q=0.5', + 'Accept-Language': 'zh-CN,zh;q=0.9', + 'Connection': 'keep-alive' + }) + retry = Retry(total=3, connect=3, read=3, backoff_factor=0.5, status_forcelist=[429, 500, 502, 503, 504], allowed_methods=frozenset(['GET'])) + adapter = HTTPAdapter(max_retries=retry) + session.mount('https://', adapter) + session.mount('http://', adapter) for c in driver.get_cookies(): session.cookies.set(c['name'], c['value'])