优化sehuat

This commit is contained in:
liuwei
2026-02-03 15:50:34 +08:00
parent ce0657e19e
commit 0432ca90dd

View File

@@ -25,6 +25,11 @@ from PIL import Image as PILImage
import re import re
from PyPDF2 import PdfReader, PdfWriter from PyPDF2 import PdfReader, PdfWriter
from loguru import logger from loguru import logger
from urllib.parse import urlparse
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
UNREACHABLE_HOSTS = set()
def download_image(url, session): def download_image(url, session):
@@ -32,9 +37,25 @@ def download_image(url, session):
try: try:
if not url.lower().endswith(('.jpg', '.jpeg', '.png')): if not url.lower().endswith(('.jpg', '.jpeg', '.png')):
return None return None
response = session.get(url, timeout=15) parsed = urlparse(url)
host = parsed.netloc.lower()
if host in UNREACHABLE_HOSTS:
return None
headers = {'Referer': f'{parsed.scheme}://{host}/'}
response = session.get(url, headers=headers, timeout=10)
response.raise_for_status() response.raise_for_status()
return BytesIO(response.content) return BytesIO(response.content)
except requests.exceptions.RequestException as e:
try:
parsed = urlparse(url)
host = parsed.netloc.lower()
msg = str(e)
if 'Network is unreachable' in msg or 'Failed to establish a new connection' in msg:
UNREACHABLE_HOSTS.add(host)
except Exception:
pass
logger.warning(f"下载图片失败: {e}")
return None
except Exception as e: except Exception as e:
logger.warning(f"下载图片失败: {e}") logger.warning(f"下载图片失败: {e}")
return None return None
@@ -133,7 +154,16 @@ def fetch_and_create_pdf(url):
# 同步 Session # 同步 Session
session = requests.Session() session = requests.Session()
ua = driver.execute_script("return navigator.userAgent") ua = driver.execute_script("return navigator.userAgent")
session.headers.update({'User-Agent': ua, 'Referer': 'https://tu.ymawv.la/'}) session.headers.update({
'User-Agent': ua,
'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*;q=0.8,*/*;q=0.5',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive'
})
retry = Retry(total=3, connect=3, read=3, backoff_factor=0.5, status_forcelist=[429, 500, 502, 503, 504], allowed_methods=frozenset(['GET']))
adapter = HTTPAdapter(max_retries=retry)
session.mount('https://', adapter)
session.mount('http://', adapter)
for c in driver.get_cookies(): for c in driver.get_cookies():
session.cookies.set(c['name'], c['value']) session.cookies.set(c['name'], c['value'])