优化sehuat
This commit is contained in:
@@ -25,25 +25,30 @@ from PIL import Image as PILImage
|
||||
import re
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
from loguru import logger
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util import Retry
|
||||
|
||||
UNREACHABLE_HOSTS = set()
|
||||
|
||||
|
||||
def download_image(url, session):
|
||||
def download_image(url, session, referer=None):
|
||||
"""使用同步的 session 下载图片,确保 Cookie 一致"""
|
||||
try:
|
||||
if not url.lower().endswith(('.jpg', '.jpeg', '.png')):
|
||||
return None
|
||||
parsed = urlparse(url)
|
||||
host = parsed.netloc.lower()
|
||||
if host in UNREACHABLE_HOSTS:
|
||||
return None
|
||||
headers = {'Referer': f'{parsed.scheme}://{host}/'}
|
||||
headers = {}
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
else:
|
||||
headers['Referer'] = f'{parsed.scheme}://{host}/'
|
||||
response = session.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
ctype = response.headers.get('Content-Type', '')
|
||||
if 'image' not in ctype.lower() and not url.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.gif')):
|
||||
return None
|
||||
return BytesIO(response.content)
|
||||
except requests.exceptions.RequestException as e:
|
||||
try:
|
||||
@@ -187,16 +192,23 @@ def fetch_and_create_pdf(url):
|
||||
for m in magnets:
|
||||
content.append(Paragraph(f"<b>{m}</b>", normal_style))
|
||||
|
||||
page_url = resp.url
|
||||
for img_tag in div.find_all('img'):
|
||||
src = img_tag.get('zoomfile')
|
||||
if src and 'http' in src:
|
||||
img_io = download_image(src, session)
|
||||
if img_io:
|
||||
with PILImage.open(img_io) as p_img:
|
||||
iw, ih = p_img.size
|
||||
sc = min(max_w / iw, max_h / ih, 1.0)
|
||||
img_io.seek(0)
|
||||
content.append(Image(img_io, width=iw * sc, height=ih * sc))
|
||||
src = img_tag.get('zoomfile') or img_tag.get('file') or img_tag.get('src') or img_tag.get('data-src')
|
||||
if not src:
|
||||
continue
|
||||
if src.startswith('//'):
|
||||
src = 'https:' + src
|
||||
elif not src.startswith('http'):
|
||||
src = urljoin(page_url, src)
|
||||
img_io = download_image(src, session, referer=page_url)
|
||||
if img_io:
|
||||
with PILImage.open(img_io) as p_img:
|
||||
iw, ih = p_img.size
|
||||
sc = min(max_w / iw, max_h / ih, 1.0)
|
||||
img_io.seek(0)
|
||||
content.append(Image(img_io, width=iw * sc, height=ih * sc))
|
||||
time.sleep(0.15)
|
||||
|
||||
if post != today_posts[-1]: content.append(PageBreak())
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user