实现转图浏览器常驻复用并支持失败自动重建
变更项:1) 新增常驻浏览器管理器,避免每次截图都冷启动 Chromium。2) 截图流程改为复用同一 Browser,每次仅创建并关闭 context/page,提升稳定性与性能。3) 增加浏览器失联检测与自动重建机制,截图失败后重建一次并重试。4) 保留多候选浏览器启动策略(system/playwright-cache/bundled),并输出准确来源日志。5) 补充中文注释,明确常驻设计目标与故障恢复逻辑。
This commit is contained in:
@@ -2,6 +2,7 @@ import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import psutil
|
||||
from playwright.async_api import async_playwright
|
||||
@@ -477,72 +478,141 @@ def check_chromium_installed(path):
|
||||
return os.path.isfile(path) and os.access(path, os.X_OK)
|
||||
|
||||
|
||||
async def html_to_image(html_content, output_image):
|
||||
async with async_playwright() as p:
|
||||
browser_candidates = []
|
||||
if os.name == 'nt':
|
||||
# Windows 优先尝试常见系统安装路径。
|
||||
possible_chrome_paths = [
|
||||
r"C:\Users\Liu_WIN10\AppData\Local\Google\Chrome\Application\chrome.exe",
|
||||
r"C:\Users\Liu-OPEN\AppData\Local\Google\Chrome\Application\chrome.exe",
|
||||
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
|
||||
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
|
||||
]
|
||||
for path in possible_chrome_paths:
|
||||
if check_chromium_installed(path):
|
||||
browser_candidates.append(("system", path))
|
||||
else:
|
||||
import glob
|
||||
# Linux 先尝试系统可执行文件,再尝试 Playwright 缓存浏览器。
|
||||
for bin_name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"):
|
||||
bin_path = shutil.which(bin_name)
|
||||
if bin_path and check_chromium_installed(bin_path):
|
||||
browser_candidates.append(("system", bin_path))
|
||||
user_home = os.path.expanduser("~")
|
||||
glob_pattern = os.path.join(user_home, ".cache", "ms-playwright", "chromium-*", "chrome-linux", "chrome")
|
||||
chrome_paths = glob.glob(glob_pattern)
|
||||
for path in sorted(chrome_paths, reverse=True):
|
||||
if check_chromium_installed(path):
|
||||
browser_candidates.append(("playwright-cache", path))
|
||||
def _collect_browser_candidates() -> list[Tuple[str, str]]:
|
||||
candidates = []
|
||||
if os.name == 'nt':
|
||||
possible_chrome_paths = [
|
||||
r"C:\Users\Liu_WIN10\AppData\Local\Google\Chrome\Application\chrome.exe",
|
||||
r"C:\Users\Liu-OPEN\AppData\Local\Google\Chrome\Application\chrome.exe",
|
||||
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
|
||||
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
|
||||
]
|
||||
for path in possible_chrome_paths:
|
||||
if check_chromium_installed(path):
|
||||
candidates.append(("system", path))
|
||||
else:
|
||||
import glob
|
||||
for bin_name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"):
|
||||
bin_path = shutil.which(bin_name)
|
||||
if bin_path and check_chromium_installed(bin_path):
|
||||
candidates.append(("system", bin_path))
|
||||
user_home = os.path.expanduser("~")
|
||||
glob_pattern = os.path.join(user_home, ".cache", "ms-playwright", "chromium-*", "chrome-linux", "chrome")
|
||||
chrome_paths = glob.glob(glob_pattern)
|
||||
for path in sorted(chrome_paths, reverse=True):
|
||||
if check_chromium_installed(path):
|
||||
candidates.append(("playwright-cache", path))
|
||||
return candidates
|
||||
|
||||
launch_args = ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]
|
||||
browser = None
|
||||
launch_errors = []
|
||||
|
||||
# 优先按候选路径逐个尝试,失败自动降级,不让单一路径问题导致整体失败。
|
||||
for source, browser_path in browser_candidates:
|
||||
class _PersistentBrowser:
|
||||
"""常驻浏览器管理器。
|
||||
|
||||
目标:避免每次截图都冷启动 Chromium,降低失败率并提升速度。
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._playwright = None
|
||||
self._browser = None
|
||||
self._lock = asyncio.Lock()
|
||||
self._launch_args = ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]
|
||||
self._last_launch_source = "unknown"
|
||||
|
||||
async def _launch_browser(self):
|
||||
if self._playwright is None:
|
||||
self._playwright = await async_playwright().start()
|
||||
|
||||
for source, browser_path in _collect_browser_candidates():
|
||||
try:
|
||||
logger.debug(f"Launch chromium with {source}: {browser_path}")
|
||||
browser = await p.chromium.launch(
|
||||
browser = await self._playwright.chromium.launch(
|
||||
executable_path=browser_path,
|
||||
args=launch_args,
|
||||
args=self._launch_args,
|
||||
timeout=20000,
|
||||
)
|
||||
break
|
||||
self._last_launch_source = f"{source}:{browser_path}"
|
||||
return browser
|
||||
except Exception as e:
|
||||
launch_errors.append(f"{source}:{browser_path} -> {e}")
|
||||
logger.warning(f"Launch chromium failed with {source}: {browser_path}, error={e}")
|
||||
|
||||
# 如果候选都失败,回退到 Playwright bundled 浏览器。
|
||||
if not browser:
|
||||
logger.debug("Launch chromium with bundled browser")
|
||||
browser = await p.chromium.launch(args=launch_args)
|
||||
logger.debug("Launch chromium with bundled browser")
|
||||
browser = await self._playwright.chromium.launch(args=self._launch_args)
|
||||
self._last_launch_source = "bundled"
|
||||
return browser
|
||||
|
||||
async def ensure_browser(self):
|
||||
if self._browser and self._browser.is_connected():
|
||||
return self._browser
|
||||
async with self._lock:
|
||||
if self._browser and self._browser.is_connected():
|
||||
return self._browser
|
||||
# 浏览器失联时先做一次清理,避免残留句柄影响重建。
|
||||
if self._browser:
|
||||
try:
|
||||
await safe_close_browser(self._browser)
|
||||
except Exception:
|
||||
pass
|
||||
self._browser = None
|
||||
self._browser = await self._launch_browser()
|
||||
logger.info(f"[md2img] 常驻浏览器就绪: source={self._last_launch_source}")
|
||||
return self._browser
|
||||
|
||||
async def restart_browser(self):
|
||||
async with self._lock:
|
||||
if self._browser:
|
||||
try:
|
||||
await safe_close_browser(self._browser)
|
||||
except Exception:
|
||||
pass
|
||||
self._browser = None
|
||||
self._browser = await self._launch_browser()
|
||||
logger.info(f"[md2img] 常驻浏览器已重建: source={self._last_launch_source}")
|
||||
return self._browser
|
||||
|
||||
async def screenshot(self, html_content: str, output_image: str):
|
||||
browser = await self.ensure_browser()
|
||||
|
||||
async def _capture_with_browser(active_browser):
|
||||
context = await active_browser.new_context(viewport={"width": 780, "height": 960}, device_scale_factor=1.2)
|
||||
try:
|
||||
page = await context.new_page()
|
||||
logger.debug("Set page content")
|
||||
await page.set_content(html_content, wait_until='domcontentloaded', timeout=15000)
|
||||
logger.debug("Wait for fonts ready")
|
||||
await page.evaluate("document.fonts.ready")
|
||||
await asyncio.sleep(0.2)
|
||||
logger.debug(f"Take screenshot: output={output_image}")
|
||||
await page.screenshot(path=output_image, full_page=True, timeout=15000, animations="disabled")
|
||||
if not os.path.exists(output_image):
|
||||
raise RuntimeError(f"截图失败,输出文件不存在: {output_image}")
|
||||
finally:
|
||||
try:
|
||||
await context.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
context = await browser.new_context(viewport={"width": 780, "height": 960}, device_scale_factor=1.2)
|
||||
page = await context.new_page()
|
||||
logger.debug("Set page content")
|
||||
await page.set_content(html_content, wait_until='domcontentloaded', timeout=15000)
|
||||
logger.debug("Wait for fonts ready")
|
||||
await page.evaluate("document.fonts.ready")
|
||||
await asyncio.sleep(0.2)
|
||||
logger.debug(f"Take screenshot: output={output_image}")
|
||||
await page.screenshot(path=output_image, full_page=True, timeout=15000, animations="disabled")
|
||||
if not os.path.exists(output_image):
|
||||
raise RuntimeError(f"截图失败,输出文件不存在: {output_image}")
|
||||
finally:
|
||||
logger.debug("Closing browser")
|
||||
await safe_close_browser(browser)
|
||||
await _capture_with_browser(browser)
|
||||
except Exception as e:
|
||||
# 首次失败后重建一次浏览器再重试,提升抗偶发故障能力。
|
||||
logger.warning(f"[md2img] 常驻浏览器截图失败,准备重建后重试: {e}")
|
||||
browser = await self.restart_browser()
|
||||
await _capture_with_browser(browser)
|
||||
|
||||
|
||||
_BROWSER_MANAGER: Optional[_PersistentBrowser] = None
|
||||
|
||||
|
||||
def _get_browser_manager() -> _PersistentBrowser:
|
||||
global _BROWSER_MANAGER
|
||||
if _BROWSER_MANAGER is None:
|
||||
_BROWSER_MANAGER = _PersistentBrowser()
|
||||
return _BROWSER_MANAGER
|
||||
|
||||
|
||||
async def html_to_image(html_content, output_image):
|
||||
manager = _get_browser_manager()
|
||||
await manager.screenshot(html_content, output_image)
|
||||
|
||||
|
||||
async def _await_with_progress(coro, timeout_seconds: int, stage_name: str, progress_interval_seconds: int = 10):
|
||||
|
||||
Reference in New Issue
Block a user