优化sehuat
This commit is contained in:
@@ -25,42 +25,16 @@ from PIL import Image as PILImage
|
|||||||
import re
|
import re
|
||||||
from PyPDF2 import PdfReader, PdfWriter
|
from PyPDF2 import PdfReader, PdfWriter
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from urllib.parse import urlparse, urljoin
|
|
||||||
from requests.adapters import HTTPAdapter
|
|
||||||
from urllib3.util import Retry
|
|
||||||
|
|
||||||
UNREACHABLE_HOSTS = set()
|
|
||||||
|
|
||||||
|
|
||||||
def download_image(url, session, referer=None):
|
def download_image(url, session):
|
||||||
"""使用同步的 session 下载图片,确保 Cookie 一致"""
|
"""使用同步的 session 下载图片,确保 Cookie 一致"""
|
||||||
try:
|
try:
|
||||||
parsed = urlparse(url)
|
if not url.lower().endswith(('.jpg', '.jpeg', '.png')):
|
||||||
host = parsed.netloc.lower()
|
|
||||||
if host in UNREACHABLE_HOSTS:
|
|
||||||
return None
|
return None
|
||||||
headers = {}
|
response = session.get(url, timeout=15)
|
||||||
if referer:
|
|
||||||
headers['Referer'] = referer
|
|
||||||
else:
|
|
||||||
headers['Referer'] = f'{parsed.scheme}://{host}/'
|
|
||||||
response = session.get(url, headers=headers, timeout=10)
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
ctype = response.headers.get('Content-Type', '')
|
|
||||||
if 'image' not in ctype.lower() and not url.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.gif')):
|
|
||||||
return None
|
|
||||||
return BytesIO(response.content)
|
return BytesIO(response.content)
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
try:
|
|
||||||
parsed = urlparse(url)
|
|
||||||
host = parsed.netloc.lower()
|
|
||||||
msg = str(e)
|
|
||||||
if 'Network is unreachable' in msg or 'Failed to establish a new connection' in msg:
|
|
||||||
UNREACHABLE_HOSTS.add(host)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
logger.warning(f"下载图片失败: {e}")
|
|
||||||
return None
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"下载图片失败: {e}")
|
logger.warning(f"下载图片失败: {e}")
|
||||||
return None
|
return None
|
||||||
@@ -159,16 +133,7 @@ def fetch_and_create_pdf(url):
|
|||||||
# 同步 Session
|
# 同步 Session
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
ua = driver.execute_script("return navigator.userAgent")
|
ua = driver.execute_script("return navigator.userAgent")
|
||||||
session.headers.update({
|
session.headers.update({'User-Agent': ua, 'Referer': 'https://www.sehuatang.net/'})
|
||||||
'User-Agent': ua,
|
|
||||||
'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*;q=0.8,*/*;q=0.5',
|
|
||||||
'Accept-Language': 'zh-CN,zh;q=0.9',
|
|
||||||
'Connection': 'keep-alive'
|
|
||||||
})
|
|
||||||
retry = Retry(total=3, connect=3, read=3, backoff_factor=0.5, status_forcelist=[429, 500, 502, 503, 504], allowed_methods=frozenset(['GET']))
|
|
||||||
adapter = HTTPAdapter(max_retries=retry)
|
|
||||||
session.mount('https://', adapter)
|
|
||||||
session.mount('http://', adapter)
|
|
||||||
for c in driver.get_cookies():
|
for c in driver.get_cookies():
|
||||||
session.cookies.set(c['name'], c['value'])
|
session.cookies.set(c['name'], c['value'])
|
||||||
|
|
||||||
@@ -192,23 +157,16 @@ def fetch_and_create_pdf(url):
|
|||||||
for m in magnets:
|
for m in magnets:
|
||||||
content.append(Paragraph(f"<b>{m}</b>", normal_style))
|
content.append(Paragraph(f"<b>{m}</b>", normal_style))
|
||||||
|
|
||||||
page_url = resp.url
|
|
||||||
for img_tag in div.find_all('img'):
|
for img_tag in div.find_all('img'):
|
||||||
src = img_tag.get('zoomfile') or img_tag.get('file') or img_tag.get('src') or img_tag.get('data-src')
|
src = img_tag.get('zoomfile')
|
||||||
if not src:
|
if src and 'http' in src:
|
||||||
continue
|
img_io = download_image(src, session)
|
||||||
if src.startswith('//'):
|
|
||||||
src = 'https:' + src
|
|
||||||
elif not src.startswith('http'):
|
|
||||||
src = urljoin(page_url, src)
|
|
||||||
img_io = download_image(src, session, referer=page_url)
|
|
||||||
if img_io:
|
if img_io:
|
||||||
with PILImage.open(img_io) as p_img:
|
with PILImage.open(img_io) as p_img:
|
||||||
iw, ih = p_img.size
|
iw, ih = p_img.size
|
||||||
sc = min(max_w / iw, max_h / ih, 1.0)
|
sc = min(max_w / iw, max_h / ih, 1.0)
|
||||||
img_io.seek(0)
|
img_io.seek(0)
|
||||||
content.append(Image(img_io, width=iw * sc, height=ih * sc))
|
content.append(Image(img_io, width=iw * sc, height=ih * sc))
|
||||||
time.sleep(0.15)
|
|
||||||
|
|
||||||
if post != today_posts[-1]: content.append(PageBreak())
|
if post != today_posts[-1]: content.append(PageBreak())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user