优化sehuat

This commit is contained in:
liuwei
2026-02-03 15:53:56 +08:00
parent 0432ca90dd
commit 5c43d5dc39

View File

@@ -25,25 +25,30 @@ from PIL import Image as PILImage
import re
from PyPDF2 import PdfReader, PdfWriter
from loguru import logger
from urllib.parse import urlparse
from urllib.parse import urlparse, urljoin
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
UNREACHABLE_HOSTS = set()
def download_image(url, session):
def download_image(url, session, referer=None):
"""使用同步的 session 下载图片,确保 Cookie 一致"""
try:
if not url.lower().endswith(('.jpg', '.jpeg', '.png')):
return None
parsed = urlparse(url)
host = parsed.netloc.lower()
if host in UNREACHABLE_HOSTS:
return None
headers = {'Referer': f'{parsed.scheme}://{host}/'}
headers = {}
if referer:
headers['Referer'] = referer
else:
headers['Referer'] = f'{parsed.scheme}://{host}/'
response = session.get(url, headers=headers, timeout=10)
response.raise_for_status()
ctype = response.headers.get('Content-Type', '')
if 'image' not in ctype.lower() and not url.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.gif')):
return None
return BytesIO(response.content)
except requests.exceptions.RequestException as e:
try:
@@ -187,16 +192,23 @@ def fetch_and_create_pdf(url):
for m in magnets:
content.append(Paragraph(f"<b>{m}</b>", normal_style))
page_url = resp.url
for img_tag in div.find_all('img'):
src = img_tag.get('zoomfile')
if src and 'http' in src:
img_io = download_image(src, session)
if img_io:
with PILImage.open(img_io) as p_img:
iw, ih = p_img.size
sc = min(max_w / iw, max_h / ih, 1.0)
img_io.seek(0)
content.append(Image(img_io, width=iw * sc, height=ih * sc))
src = img_tag.get('zoomfile') or img_tag.get('file') or img_tag.get('src') or img_tag.get('data-src')
if not src:
continue
if src.startswith('//'):
src = 'https:' + src
elif not src.startswith('http'):
src = urljoin(page_url, src)
img_io = download_image(src, session, referer=page_url)
if img_io:
with PILImage.open(img_io) as p_img:
iw, ih = p_img.size
sc = min(max_w / iw, max_h / ih, 1.0)
img_io.seek(0)
content.append(Image(img_io, width=iw * sc, height=ih * sc))
time.sleep(0.15)
if post != today_posts[-1]: content.append(PageBreak())
except Exception as e: