diff --git a/utils/markdown_to_image.py b/utils/markdown_to_image.py index 7d1d399..8f4fe0c 100644 --- a/utils/markdown_to_image.py +++ b/utils/markdown_to_image.py @@ -2,6 +2,7 @@ import subprocess import time from pathlib import Path import shutil +from typing import Optional, Tuple import psutil from playwright.async_api import async_playwright @@ -477,72 +478,141 @@ def check_chromium_installed(path): return os.path.isfile(path) and os.access(path, os.X_OK) -async def html_to_image(html_content, output_image): - async with async_playwright() as p: - browser_candidates = [] - if os.name == 'nt': - # Windows 优先尝试常见系统安装路径。 - possible_chrome_paths = [ - r"C:\Users\Liu_WIN10\AppData\Local\Google\Chrome\Application\chrome.exe", - r"C:\Users\Liu-OPEN\AppData\Local\Google\Chrome\Application\chrome.exe", - r"C:\Program Files\Google\Chrome\Application\chrome.exe", - r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" - ] - for path in possible_chrome_paths: - if check_chromium_installed(path): - browser_candidates.append(("system", path)) - else: - import glob - # Linux 先尝试系统可执行文件,再尝试 Playwright 缓存浏览器。 - for bin_name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"): - bin_path = shutil.which(bin_name) - if bin_path and check_chromium_installed(bin_path): - browser_candidates.append(("system", bin_path)) - user_home = os.path.expanduser("~") - glob_pattern = os.path.join(user_home, ".cache", "ms-playwright", "chromium-*", "chrome-linux", "chrome") - chrome_paths = glob.glob(glob_pattern) - for path in sorted(chrome_paths, reverse=True): - if check_chromium_installed(path): - browser_candidates.append(("playwright-cache", path)) +def _collect_browser_candidates() -> list[Tuple[str, str]]: + candidates = [] + if os.name == 'nt': + possible_chrome_paths = [ + r"C:\Users\Liu_WIN10\AppData\Local\Google\Chrome\Application\chrome.exe", + r"C:\Users\Liu-OPEN\AppData\Local\Google\Chrome\Application\chrome.exe", + r"C:\Program Files\Google\Chrome\Application\chrome.exe", + r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe", + ] + for path in possible_chrome_paths: + if check_chromium_installed(path): + candidates.append(("system", path)) + else: + import glob + for bin_name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"): + bin_path = shutil.which(bin_name) + if bin_path and check_chromium_installed(bin_path): + candidates.append(("system", bin_path)) + user_home = os.path.expanduser("~") + glob_pattern = os.path.join(user_home, ".cache", "ms-playwright", "chromium-*", "chrome-linux", "chrome") + chrome_paths = glob.glob(glob_pattern) + for path in sorted(chrome_paths, reverse=True): + if check_chromium_installed(path): + candidates.append(("playwright-cache", path)) + return candidates - launch_args = ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"] - browser = None - launch_errors = [] - # 优先按候选路径逐个尝试,失败自动降级,不让单一路径问题导致整体失败。 - for source, browser_path in browser_candidates: +class _PersistentBrowser: + """常驻浏览器管理器。 + + 目标:避免每次截图都冷启动 Chromium,降低失败率并提升速度。 + """ + + def __init__(self): + self._playwright = None + self._browser = None + self._lock = asyncio.Lock() + self._launch_args = ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"] + self._last_launch_source = "unknown" + + async def _launch_browser(self): + if self._playwright is None: + self._playwright = await async_playwright().start() + + for source, browser_path in _collect_browser_candidates(): try: logger.debug(f"Launch chromium with {source}: {browser_path}") - browser = await p.chromium.launch( + browser = await self._playwright.chromium.launch( executable_path=browser_path, - args=launch_args, + args=self._launch_args, timeout=20000, ) - break + self._last_launch_source = f"{source}:{browser_path}" + return browser except Exception as e: - launch_errors.append(f"{source}:{browser_path} -> {e}") logger.warning(f"Launch chromium failed with {source}: {browser_path}, error={e}") - # 如果候选都失败,回退到 Playwright bundled 浏览器。 - if not browser: - logger.debug("Launch chromium with bundled browser") - browser = await p.chromium.launch(args=launch_args) + logger.debug("Launch chromium with bundled browser") + browser = await self._playwright.chromium.launch(args=self._launch_args) + self._last_launch_source = "bundled" + return browser + + async def ensure_browser(self): + if self._browser and self._browser.is_connected(): + return self._browser + async with self._lock: + if self._browser and self._browser.is_connected(): + return self._browser + # 浏览器失联时先做一次清理,避免残留句柄影响重建。 + if self._browser: + try: + await safe_close_browser(self._browser) + except Exception: + pass + self._browser = None + self._browser = await self._launch_browser() + logger.info(f"[md2img] 常驻浏览器就绪: source={self._last_launch_source}") + return self._browser + + async def restart_browser(self): + async with self._lock: + if self._browser: + try: + await safe_close_browser(self._browser) + except Exception: + pass + self._browser = None + self._browser = await self._launch_browser() + logger.info(f"[md2img] 常驻浏览器已重建: source={self._last_launch_source}") + return self._browser + + async def screenshot(self, html_content: str, output_image: str): + browser = await self.ensure_browser() + + async def _capture_with_browser(active_browser): + context = await active_browser.new_context(viewport={"width": 780, "height": 960}, device_scale_factor=1.2) + try: + page = await context.new_page() + logger.debug("Set page content") + await page.set_content(html_content, wait_until='domcontentloaded', timeout=15000) + logger.debug("Wait for fonts ready") + await page.evaluate("document.fonts.ready") + await asyncio.sleep(0.2) + logger.debug(f"Take screenshot: output={output_image}") + await page.screenshot(path=output_image, full_page=True, timeout=15000, animations="disabled") + if not os.path.exists(output_image): + raise RuntimeError(f"截图失败,输出文件不存在: {output_image}") + finally: + try: + await context.close() + except Exception: + pass try: - context = await browser.new_context(viewport={"width": 780, "height": 960}, device_scale_factor=1.2) - page = await context.new_page() - logger.debug("Set page content") - await page.set_content(html_content, wait_until='domcontentloaded', timeout=15000) - logger.debug("Wait for fonts ready") - await page.evaluate("document.fonts.ready") - await asyncio.sleep(0.2) - logger.debug(f"Take screenshot: output={output_image}") - await page.screenshot(path=output_image, full_page=True, timeout=15000, animations="disabled") - if not os.path.exists(output_image): - raise RuntimeError(f"截图失败,输出文件不存在: {output_image}") - finally: - logger.debug("Closing browser") - await safe_close_browser(browser) + await _capture_with_browser(browser) + except Exception as e: + # 首次失败后重建一次浏览器再重试,提升抗偶发故障能力。 + logger.warning(f"[md2img] 常驻浏览器截图失败,准备重建后重试: {e}") + browser = await self.restart_browser() + await _capture_with_browser(browser) + + +_BROWSER_MANAGER: Optional[_PersistentBrowser] = None + + +def _get_browser_manager() -> _PersistentBrowser: + global _BROWSER_MANAGER + if _BROWSER_MANAGER is None: + _BROWSER_MANAGER = _PersistentBrowser() + return _BROWSER_MANAGER + + +async def html_to_image(html_content, output_image): + manager = _get_browser_manager() + await manager.screenshot(html_content, output_image) async def _await_with_progress(coro, timeout_seconds: int, stage_name: str, progress_interval_seconds: int = 10):