修复转图运行时重复启动与高频误重建问题
变更项:\n1. 修复 md2img 运行时并发启动竞争,新增启动中标记,避免同名线程被重复拉起。\n2. 优化浏览器心跳探测策略:由高频激进重建改为断连连续判定后再重建,降低误判。\n3. 新增截图进行中保护,截图期间心跳跳过探测,避免与业务并发导致误重建。\n4. 为浏览器重建增加 reason 日志字段,便于线上追踪重建触发原因。\n5. 保留截图后断连自愈能力,但改为更稳健的触发路径,减少无意义重建。
This commit is contained in:
@@ -523,6 +523,10 @@ class _PersistentBrowser:
|
||||
self._owner_loop_id: Optional[int] = None
|
||||
# 保活心跳任务:定期探测浏览器连通性,异常时自动重建。
|
||||
self._heartbeat_task: Optional[asyncio.Task] = None
|
||||
# 心跳断连计数:避免单次抖动就触发重建。
|
||||
self._disconnect_streak = 0
|
||||
# 截图进行中标记:心跳期间若业务在跑,跳过本轮探测以避免误判。
|
||||
self._capture_in_progress = False
|
||||
|
||||
async def _launch_browser(self):
|
||||
if self._playwright is None:
|
||||
@@ -581,7 +585,7 @@ class _PersistentBrowser:
|
||||
self._ensure_heartbeat_task()
|
||||
return self._browser
|
||||
|
||||
async def restart_browser(self):
|
||||
async def restart_browser(self, reason: str = "unknown"):
|
||||
async with self._lock:
|
||||
if self._browser:
|
||||
try:
|
||||
@@ -591,10 +595,11 @@ class _PersistentBrowser:
|
||||
self._browser = None
|
||||
self._browser = await self._launch_browser()
|
||||
self._owner_loop_id = id(asyncio.get_running_loop())
|
||||
self._disconnect_streak = 0
|
||||
browser_pid = getattr(getattr(self._browser, "process", None), "pid", None)
|
||||
logger.info(
|
||||
f"[md2img] 常驻浏览器已重建: source={self._last_launch_source}, "
|
||||
f"loop={self._owner_loop_id}, pid={browser_pid}"
|
||||
f"loop={self._owner_loop_id}, pid={browser_pid}, reason={reason}"
|
||||
)
|
||||
self._ensure_heartbeat_task()
|
||||
return self._browser
|
||||
@@ -613,13 +618,24 @@ class _PersistentBrowser:
|
||||
"""周期性探测浏览器可用性,断连后自动重建。"""
|
||||
while True:
|
||||
try:
|
||||
await asyncio.sleep(10)
|
||||
await asyncio.sleep(20)
|
||||
# 没有浏览器实例时只保持心跳存活,不主动创建,避免空闲时不必要消耗。
|
||||
if not self._browser:
|
||||
self._disconnect_streak = 0
|
||||
continue
|
||||
if not await self._is_browser_alive(self._browser, timeout_seconds=2.0):
|
||||
logger.warning("[md2img] 心跳探测发现浏览器已断连,准备自动重建")
|
||||
await self.restart_browser()
|
||||
if self._capture_in_progress:
|
||||
# 截图期间跳过探测,避免与业务并发导致误判。
|
||||
continue
|
||||
if self._browser and self._browser.is_connected():
|
||||
self._disconnect_streak = 0
|
||||
continue
|
||||
|
||||
self._disconnect_streak += 1
|
||||
if self._disconnect_streak >= 3:
|
||||
logger.warning(
|
||||
f"[md2img] 心跳探测连续{self._disconnect_streak}次发现浏览器断连,准备自动重建"
|
||||
)
|
||||
await self.restart_browser(reason="heartbeat_disconnected")
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as e:
|
||||
@@ -635,8 +651,10 @@ class _PersistentBrowser:
|
||||
browser = await self.ensure_browser()
|
||||
|
||||
async def _capture_with_browser(active_browser):
|
||||
context = await active_browser.new_context(viewport={"width": 780, "height": 960}, device_scale_factor=1.2)
|
||||
self._capture_in_progress = True
|
||||
context = None
|
||||
try:
|
||||
context = await active_browser.new_context(viewport={"width": 780, "height": 960}, device_scale_factor=1.2)
|
||||
page = await context.new_page()
|
||||
logger.debug("Set page content")
|
||||
await page.set_content(html_content, wait_until='domcontentloaded', timeout=15000)
|
||||
@@ -649,9 +667,11 @@ class _PersistentBrowser:
|
||||
raise RuntimeError(f"截图失败,输出文件不存在: {output_image}")
|
||||
finally:
|
||||
try:
|
||||
await context.close()
|
||||
if context:
|
||||
await context.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._capture_in_progress = False
|
||||
|
||||
try:
|
||||
await _capture_with_browser(browser)
|
||||
@@ -659,11 +679,11 @@ class _PersistentBrowser:
|
||||
# 在部分系统环境中,浏览器可能在任务完成后迅速断连,这里主动重建保证“常驻”语义。
|
||||
if not await self._is_browser_alive(browser, timeout_seconds=2.0):
|
||||
logger.warning("[md2img] 截图后浏览器已断连,立即执行自动重建")
|
||||
await self.restart_browser()
|
||||
await self.restart_browser(reason="post_capture_disconnected")
|
||||
except Exception as e:
|
||||
# 首次失败后重建一次浏览器再重试,提升抗偶发故障能力。
|
||||
logger.warning(f"[md2img] 常驻浏览器截图失败,准备重建后重试: {e}")
|
||||
browser = await self.restart_browser()
|
||||
browser = await self.restart_browser(reason="capture_exception_retry")
|
||||
await _capture_with_browser(browser)
|
||||
|
||||
|
||||
@@ -685,6 +705,8 @@ class _Md2ImgRuntime:
|
||||
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
self._lock = threading.Lock()
|
||||
self._ready = threading.Event()
|
||||
# 启动中标记:避免并发调用 ensure_started 时重复创建线程。
|
||||
self._starting = False
|
||||
|
||||
@property
|
||||
def loop(self) -> Optional[asyncio.AbstractEventLoop]:
|
||||
@@ -706,11 +728,21 @@ class _Md2ImgRuntime:
|
||||
with self._lock:
|
||||
if self._thread and self._thread.is_alive() and self._loop and self._loop.is_running():
|
||||
return
|
||||
self._ready.clear()
|
||||
self._thread = threading.Thread(target=self._thread_main, name="md2img-runtime", daemon=True)
|
||||
self._thread.start()
|
||||
if not self._ready.wait(timeout=10):
|
||||
raise RuntimeError("md2img 专用运行时启动超时")
|
||||
if self._starting:
|
||||
# 已有其他调用在启动中,当前线程等待启动完成即可。
|
||||
pass
|
||||
else:
|
||||
self._starting = True
|
||||
self._ready.clear()
|
||||
self._thread = threading.Thread(target=self._thread_main, name="md2img-runtime", daemon=True)
|
||||
self._thread.start()
|
||||
# 注意:等待动作放到锁外,避免阻塞其他读取逻辑。
|
||||
if not self._ready.wait(timeout=10):
|
||||
with self._lock:
|
||||
self._starting = False
|
||||
raise RuntimeError("md2img 专用运行时启动超时")
|
||||
with self._lock:
|
||||
self._starting = False
|
||||
|
||||
def submit(self, coro) -> ConcurrentFuture:
|
||||
"""向专用运行时提交协程任务。"""
|
||||
|
||||
Reference in New Issue
Block a user