实现转图浏览器常驻复用并支持失败自动重建
变更项:1) 新增常驻浏览器管理器,避免每次截图都冷启动 Chromium。2) 截图流程改为复用同一 Browser,每次仅创建并关闭 context/page,提升稳定性与性能。3) 增加浏览器失联检测与自动重建机制,截图失败后重建一次并重试。4) 保留多候选浏览器启动策略(system/playwright-cache/bundled),并输出准确来源日志。5) 补充中文注释,明确常驻设计目标与故障恢复逻辑。
This commit is contained in:
@@ -2,6 +2,7 @@ import subprocess
|
|||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import shutil
|
import shutil
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
import psutil
|
import psutil
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
@@ -477,72 +478,141 @@ def check_chromium_installed(path):
|
|||||||
return os.path.isfile(path) and os.access(path, os.X_OK)
|
return os.path.isfile(path) and os.access(path, os.X_OK)
|
||||||
|
|
||||||
|
|
||||||
async def html_to_image(html_content, output_image):
|
def _collect_browser_candidates() -> list[Tuple[str, str]]:
|
||||||
async with async_playwright() as p:
|
candidates = []
|
||||||
browser_candidates = []
|
if os.name == 'nt':
|
||||||
if os.name == 'nt':
|
possible_chrome_paths = [
|
||||||
# Windows 优先尝试常见系统安装路径。
|
r"C:\Users\Liu_WIN10\AppData\Local\Google\Chrome\Application\chrome.exe",
|
||||||
possible_chrome_paths = [
|
r"C:\Users\Liu-OPEN\AppData\Local\Google\Chrome\Application\chrome.exe",
|
||||||
r"C:\Users\Liu_WIN10\AppData\Local\Google\Chrome\Application\chrome.exe",
|
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
|
||||||
r"C:\Users\Liu-OPEN\AppData\Local\Google\Chrome\Application\chrome.exe",
|
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
|
||||||
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
|
]
|
||||||
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
|
for path in possible_chrome_paths:
|
||||||
]
|
if check_chromium_installed(path):
|
||||||
for path in possible_chrome_paths:
|
candidates.append(("system", path))
|
||||||
if check_chromium_installed(path):
|
else:
|
||||||
browser_candidates.append(("system", path))
|
import glob
|
||||||
else:
|
for bin_name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"):
|
||||||
import glob
|
bin_path = shutil.which(bin_name)
|
||||||
# Linux 先尝试系统可执行文件,再尝试 Playwright 缓存浏览器。
|
if bin_path and check_chromium_installed(bin_path):
|
||||||
for bin_name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"):
|
candidates.append(("system", bin_path))
|
||||||
bin_path = shutil.which(bin_name)
|
user_home = os.path.expanduser("~")
|
||||||
if bin_path and check_chromium_installed(bin_path):
|
glob_pattern = os.path.join(user_home, ".cache", "ms-playwright", "chromium-*", "chrome-linux", "chrome")
|
||||||
browser_candidates.append(("system", bin_path))
|
chrome_paths = glob.glob(glob_pattern)
|
||||||
user_home = os.path.expanduser("~")
|
for path in sorted(chrome_paths, reverse=True):
|
||||||
glob_pattern = os.path.join(user_home, ".cache", "ms-playwright", "chromium-*", "chrome-linux", "chrome")
|
if check_chromium_installed(path):
|
||||||
chrome_paths = glob.glob(glob_pattern)
|
candidates.append(("playwright-cache", path))
|
||||||
for path in sorted(chrome_paths, reverse=True):
|
return candidates
|
||||||
if check_chromium_installed(path):
|
|
||||||
browser_candidates.append(("playwright-cache", path))
|
|
||||||
|
|
||||||
launch_args = ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]
|
|
||||||
browser = None
|
|
||||||
launch_errors = []
|
|
||||||
|
|
||||||
# 优先按候选路径逐个尝试,失败自动降级,不让单一路径问题导致整体失败。
|
class _PersistentBrowser:
|
||||||
for source, browser_path in browser_candidates:
|
"""常驻浏览器管理器。
|
||||||
|
|
||||||
|
目标:避免每次截图都冷启动 Chromium,降低失败率并提升速度。
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._playwright = None
|
||||||
|
self._browser = None
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
self._launch_args = ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]
|
||||||
|
self._last_launch_source = "unknown"
|
||||||
|
|
||||||
|
async def _launch_browser(self):
|
||||||
|
if self._playwright is None:
|
||||||
|
self._playwright = await async_playwright().start()
|
||||||
|
|
||||||
|
for source, browser_path in _collect_browser_candidates():
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Launch chromium with {source}: {browser_path}")
|
logger.debug(f"Launch chromium with {source}: {browser_path}")
|
||||||
browser = await p.chromium.launch(
|
browser = await self._playwright.chromium.launch(
|
||||||
executable_path=browser_path,
|
executable_path=browser_path,
|
||||||
args=launch_args,
|
args=self._launch_args,
|
||||||
timeout=20000,
|
timeout=20000,
|
||||||
)
|
)
|
||||||
break
|
self._last_launch_source = f"{source}:{browser_path}"
|
||||||
|
return browser
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
launch_errors.append(f"{source}:{browser_path} -> {e}")
|
|
||||||
logger.warning(f"Launch chromium failed with {source}: {browser_path}, error={e}")
|
logger.warning(f"Launch chromium failed with {source}: {browser_path}, error={e}")
|
||||||
|
|
||||||
# 如果候选都失败,回退到 Playwright bundled 浏览器。
|
logger.debug("Launch chromium with bundled browser")
|
||||||
if not browser:
|
browser = await self._playwright.chromium.launch(args=self._launch_args)
|
||||||
logger.debug("Launch chromium with bundled browser")
|
self._last_launch_source = "bundled"
|
||||||
browser = await p.chromium.launch(args=launch_args)
|
return browser
|
||||||
|
|
||||||
|
async def ensure_browser(self):
|
||||||
|
if self._browser and self._browser.is_connected():
|
||||||
|
return self._browser
|
||||||
|
async with self._lock:
|
||||||
|
if self._browser and self._browser.is_connected():
|
||||||
|
return self._browser
|
||||||
|
# 浏览器失联时先做一次清理,避免残留句柄影响重建。
|
||||||
|
if self._browser:
|
||||||
|
try:
|
||||||
|
await safe_close_browser(self._browser)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._browser = None
|
||||||
|
self._browser = await self._launch_browser()
|
||||||
|
logger.info(f"[md2img] 常驻浏览器就绪: source={self._last_launch_source}")
|
||||||
|
return self._browser
|
||||||
|
|
||||||
|
async def restart_browser(self):
|
||||||
|
async with self._lock:
|
||||||
|
if self._browser:
|
||||||
|
try:
|
||||||
|
await safe_close_browser(self._browser)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._browser = None
|
||||||
|
self._browser = await self._launch_browser()
|
||||||
|
logger.info(f"[md2img] 常驻浏览器已重建: source={self._last_launch_source}")
|
||||||
|
return self._browser
|
||||||
|
|
||||||
|
async def screenshot(self, html_content: str, output_image: str):
|
||||||
|
browser = await self.ensure_browser()
|
||||||
|
|
||||||
|
async def _capture_with_browser(active_browser):
|
||||||
|
context = await active_browser.new_context(viewport={"width": 780, "height": 960}, device_scale_factor=1.2)
|
||||||
|
try:
|
||||||
|
page = await context.new_page()
|
||||||
|
logger.debug("Set page content")
|
||||||
|
await page.set_content(html_content, wait_until='domcontentloaded', timeout=15000)
|
||||||
|
logger.debug("Wait for fonts ready")
|
||||||
|
await page.evaluate("document.fonts.ready")
|
||||||
|
await asyncio.sleep(0.2)
|
||||||
|
logger.debug(f"Take screenshot: output={output_image}")
|
||||||
|
await page.screenshot(path=output_image, full_page=True, timeout=15000, animations="disabled")
|
||||||
|
if not os.path.exists(output_image):
|
||||||
|
raise RuntimeError(f"截图失败,输出文件不存在: {output_image}")
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
await context.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
context = await browser.new_context(viewport={"width": 780, "height": 960}, device_scale_factor=1.2)
|
await _capture_with_browser(browser)
|
||||||
page = await context.new_page()
|
except Exception as e:
|
||||||
logger.debug("Set page content")
|
# 首次失败后重建一次浏览器再重试,提升抗偶发故障能力。
|
||||||
await page.set_content(html_content, wait_until='domcontentloaded', timeout=15000)
|
logger.warning(f"[md2img] 常驻浏览器截图失败,准备重建后重试: {e}")
|
||||||
logger.debug("Wait for fonts ready")
|
browser = await self.restart_browser()
|
||||||
await page.evaluate("document.fonts.ready")
|
await _capture_with_browser(browser)
|
||||||
await asyncio.sleep(0.2)
|
|
||||||
logger.debug(f"Take screenshot: output={output_image}")
|
|
||||||
await page.screenshot(path=output_image, full_page=True, timeout=15000, animations="disabled")
|
_BROWSER_MANAGER: Optional[_PersistentBrowser] = None
|
||||||
if not os.path.exists(output_image):
|
|
||||||
raise RuntimeError(f"截图失败,输出文件不存在: {output_image}")
|
|
||||||
finally:
|
def _get_browser_manager() -> _PersistentBrowser:
|
||||||
logger.debug("Closing browser")
|
global _BROWSER_MANAGER
|
||||||
await safe_close_browser(browser)
|
if _BROWSER_MANAGER is None:
|
||||||
|
_BROWSER_MANAGER = _PersistentBrowser()
|
||||||
|
return _BROWSER_MANAGER
|
||||||
|
|
||||||
|
|
||||||
|
async def html_to_image(html_content, output_image):
|
||||||
|
manager = _get_browser_manager()
|
||||||
|
await manager.screenshot(html_content, output_image)
|
||||||
|
|
||||||
|
|
||||||
async def _await_with_progress(coro, timeout_seconds: int, stage_name: str, progress_interval_seconds: int = 10):
|
async def _await_with_progress(coro, timeout_seconds: int, stage_name: str, progress_interval_seconds: int = 10):
|
||||||
|
|||||||
Reference in New Issue
Block a user