优化sehuat

This commit is contained in:
liuwei
2026-02-03 15:50:34 +08:00
parent ce0657e19e
commit 0432ca90dd

View File

@@ -25,6 +25,11 @@ from PIL import Image as PILImage
import re
from PyPDF2 import PdfReader, PdfWriter
from loguru import logger
from urllib.parse import urlparse
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
UNREACHABLE_HOSTS = set()
def download_image(url, session):
@@ -32,9 +37,25 @@ def download_image(url, session):
try:
if not url.lower().endswith(('.jpg', '.jpeg', '.png')):
return None
response = session.get(url, timeout=15)
parsed = urlparse(url)
host = parsed.netloc.lower()
if host in UNREACHABLE_HOSTS:
return None
headers = {'Referer': f'{parsed.scheme}://{host}/'}
response = session.get(url, headers=headers, timeout=10)
response.raise_for_status()
return BytesIO(response.content)
except requests.exceptions.RequestException as e:
try:
parsed = urlparse(url)
host = parsed.netloc.lower()
msg = str(e)
if 'Network is unreachable' in msg or 'Failed to establish a new connection' in msg:
UNREACHABLE_HOSTS.add(host)
except Exception:
pass
logger.warning(f"下载图片失败: {e}")
return None
except Exception as e:
logger.warning(f"下载图片失败: {e}")
return None
@@ -133,7 +154,16 @@ def fetch_and_create_pdf(url):
# 同步 Session
session = requests.Session()
ua = driver.execute_script("return navigator.userAgent")
session.headers.update({'User-Agent': ua, 'Referer': 'https://tu.ymawv.la/'})
session.headers.update({
'User-Agent': ua,
'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*;q=0.8,*/*;q=0.5',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive'
})
retry = Retry(total=3, connect=3, read=3, backoff_factor=0.5, status_forcelist=[429, 500, 502, 503, 504], allowed_methods=frozenset(['GET']))
adapter = HTTPAdapter(max_retries=retry)
session.mount('https://', adapter)
session.mount('http://', adapter)
for c in driver.get_cookies():
session.cookies.set(c['name'], c['value'])