实现转图浏览器常驻复用并支持失败自动重建

变更项:1) 新增常驻浏览器管理器,避免每次截图都冷启动 Chromium。2) 截图流程改为复用同一 Browser,每次仅创建并关闭 context/page,提升稳定性与性能。3) 增加浏览器失联检测与自动重建机制,截图失败后重建一次并重试。4) 保留多候选浏览器启动策略(system/playwright-cache/bundled),并输出准确来源日志。5) 补充中文注释,明确常驻设计目标与故障恢复逻辑。
This commit is contained in:
liuwei
2026-04-17 09:26:23 +08:00
parent 43c334354f
commit f90c0720b3

View File

@@ -2,6 +2,7 @@ import subprocess
import time
from pathlib import Path
import shutil
from typing import Optional, Tuple
import psutil
from playwright.async_api import async_playwright
@@ -477,72 +478,141 @@ def check_chromium_installed(path):
return os.path.isfile(path) and os.access(path, os.X_OK)
async def html_to_image(html_content, output_image):
async with async_playwright() as p:
browser_candidates = []
if os.name == 'nt':
# Windows 优先尝试常见系统安装路径。
possible_chrome_paths = [
r"C:\Users\Liu_WIN10\AppData\Local\Google\Chrome\Application\chrome.exe",
r"C:\Users\Liu-OPEN\AppData\Local\Google\Chrome\Application\chrome.exe",
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
]
for path in possible_chrome_paths:
if check_chromium_installed(path):
browser_candidates.append(("system", path))
else:
import glob
# Linux 先尝试系统可执行文件,再尝试 Playwright 缓存浏览器。
for bin_name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"):
bin_path = shutil.which(bin_name)
if bin_path and check_chromium_installed(bin_path):
browser_candidates.append(("system", bin_path))
user_home = os.path.expanduser("~")
glob_pattern = os.path.join(user_home, ".cache", "ms-playwright", "chromium-*", "chrome-linux", "chrome")
chrome_paths = glob.glob(glob_pattern)
for path in sorted(chrome_paths, reverse=True):
if check_chromium_installed(path):
browser_candidates.append(("playwright-cache", path))
def _collect_browser_candidates() -> list[Tuple[str, str]]:
candidates = []
if os.name == 'nt':
possible_chrome_paths = [
r"C:\Users\Liu_WIN10\AppData\Local\Google\Chrome\Application\chrome.exe",
r"C:\Users\Liu-OPEN\AppData\Local\Google\Chrome\Application\chrome.exe",
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
]
for path in possible_chrome_paths:
if check_chromium_installed(path):
candidates.append(("system", path))
else:
import glob
for bin_name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"):
bin_path = shutil.which(bin_name)
if bin_path and check_chromium_installed(bin_path):
candidates.append(("system", bin_path))
user_home = os.path.expanduser("~")
glob_pattern = os.path.join(user_home, ".cache", "ms-playwright", "chromium-*", "chrome-linux", "chrome")
chrome_paths = glob.glob(glob_pattern)
for path in sorted(chrome_paths, reverse=True):
if check_chromium_installed(path):
candidates.append(("playwright-cache", path))
return candidates
launch_args = ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]
browser = None
launch_errors = []
# 优先按候选路径逐个尝试,失败自动降级,不让单一路径问题导致整体失败。
for source, browser_path in browser_candidates:
class _PersistentBrowser:
"""常驻浏览器管理器。
目标:避免每次截图都冷启动 Chromium降低失败率并提升速度。
"""
def __init__(self):
self._playwright = None
self._browser = None
self._lock = asyncio.Lock()
self._launch_args = ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]
self._last_launch_source = "unknown"
async def _launch_browser(self):
if self._playwright is None:
self._playwright = await async_playwright().start()
for source, browser_path in _collect_browser_candidates():
try:
logger.debug(f"Launch chromium with {source}: {browser_path}")
browser = await p.chromium.launch(
browser = await self._playwright.chromium.launch(
executable_path=browser_path,
args=launch_args,
args=self._launch_args,
timeout=20000,
)
break
self._last_launch_source = f"{source}:{browser_path}"
return browser
except Exception as e:
launch_errors.append(f"{source}:{browser_path} -> {e}")
logger.warning(f"Launch chromium failed with {source}: {browser_path}, error={e}")
# 如果候选都失败,回退到 Playwright bundled 浏览器。
if not browser:
logger.debug("Launch chromium with bundled browser")
browser = await p.chromium.launch(args=launch_args)
logger.debug("Launch chromium with bundled browser")
browser = await self._playwright.chromium.launch(args=self._launch_args)
self._last_launch_source = "bundled"
return browser
async def ensure_browser(self):
if self._browser and self._browser.is_connected():
return self._browser
async with self._lock:
if self._browser and self._browser.is_connected():
return self._browser
# 浏览器失联时先做一次清理,避免残留句柄影响重建。
if self._browser:
try:
await safe_close_browser(self._browser)
except Exception:
pass
self._browser = None
self._browser = await self._launch_browser()
logger.info(f"[md2img] 常驻浏览器就绪: source={self._last_launch_source}")
return self._browser
async def restart_browser(self):
async with self._lock:
if self._browser:
try:
await safe_close_browser(self._browser)
except Exception:
pass
self._browser = None
self._browser = await self._launch_browser()
logger.info(f"[md2img] 常驻浏览器已重建: source={self._last_launch_source}")
return self._browser
async def screenshot(self, html_content: str, output_image: str):
browser = await self.ensure_browser()
async def _capture_with_browser(active_browser):
context = await active_browser.new_context(viewport={"width": 780, "height": 960}, device_scale_factor=1.2)
try:
page = await context.new_page()
logger.debug("Set page content")
await page.set_content(html_content, wait_until='domcontentloaded', timeout=15000)
logger.debug("Wait for fonts ready")
await page.evaluate("document.fonts.ready")
await asyncio.sleep(0.2)
logger.debug(f"Take screenshot: output={output_image}")
await page.screenshot(path=output_image, full_page=True, timeout=15000, animations="disabled")
if not os.path.exists(output_image):
raise RuntimeError(f"截图失败,输出文件不存在: {output_image}")
finally:
try:
await context.close()
except Exception:
pass
try:
context = await browser.new_context(viewport={"width": 780, "height": 960}, device_scale_factor=1.2)
page = await context.new_page()
logger.debug("Set page content")
await page.set_content(html_content, wait_until='domcontentloaded', timeout=15000)
logger.debug("Wait for fonts ready")
await page.evaluate("document.fonts.ready")
await asyncio.sleep(0.2)
logger.debug(f"Take screenshot: output={output_image}")
await page.screenshot(path=output_image, full_page=True, timeout=15000, animations="disabled")
if not os.path.exists(output_image):
raise RuntimeError(f"截图失败,输出文件不存在: {output_image}")
finally:
logger.debug("Closing browser")
await safe_close_browser(browser)
await _capture_with_browser(browser)
except Exception as e:
# 首次失败后重建一次浏览器再重试,提升抗偶发故障能力。
logger.warning(f"[md2img] 常驻浏览器截图失败,准备重建后重试: {e}")
browser = await self.restart_browser()
await _capture_with_browser(browser)
_BROWSER_MANAGER: Optional[_PersistentBrowser] = None
def _get_browser_manager() -> _PersistentBrowser:
global _BROWSER_MANAGER
if _BROWSER_MANAGER is None:
_BROWSER_MANAGER = _PersistentBrowser()
return _BROWSER_MANAGER
async def html_to_image(html_content, output_image):
manager = _get_browser_manager()
await manager.screenshot(html_content, output_image)
async def _await_with_progress(coro, timeout_seconds: int, stage_name: str, progress_interval_seconds: int = 10):