修复群总结转图假死:增加阶段进度日志与超时保护

变更项:1) 移除图片渲染前截断,保持完整内容转图。2) 群总结转图增加总超时,超时后快速降级文本,避免任务长期阻塞。3) markdown_to_image 增加每10秒进度心跳日志,定位卡在 markdown_to_html 或 html_to_image。4) 分阶段超时与阶段开始日志完善,解决无错误无进度的问题。
This commit is contained in:
liuwei
2026-04-17 09:16:26 +08:00
parent 6b68de7f4e
commit 55c3b951d5
2 changed files with 78 additions and 6 deletions

View File

@@ -89,6 +89,11 @@ class MessageSummaryPlugin(MessagePluginInterface):
self._connect_timeout_seconds = int(api_config.get("connect_timeout_seconds", 10))
self._request_timeout_seconds = int(api_config.get("request_timeout_seconds", 180))
self._retry_delays_seconds = api_config.get("retry_delays_seconds", [10, 20])
# 输出阶段超时与体积保护:防止 Markdown 转图在异常环境下长时间卡死。
output_config = self._config.get("output", {})
self._image_render_timeout_seconds = int(output_config.get("image_render_timeout_seconds", 90))
# 默认只尝试 1 次,优先保证任务快速返回;需要更高成功率可在配置里提高。
self._image_render_retries = int(output_config.get("image_render_retries", 1))
self.llm_client = UnifiedLLMClient(api_config)
self._api_mode = self.llm_client.mode or self._api_mode
self._response_mode = self.llm_client.response_mode or self._response_mode
@@ -539,7 +544,18 @@ class MessageSummaryPlugin(MessagePluginInterface):
timestamp = int(time.time())
output_path = f"summary_{timestamp}.png"
self.LOG.info(f"开始生成图片: {output_path}")
spath = await convert_md_str_to_image(answer, output_path)
# 额外包一层总超时,确保就算底层依赖异常也不会把整个任务拖住。
total_timeout = max(30, self._image_render_timeout_seconds * self._image_render_retries + 10)
spath = await asyncio.wait_for(
convert_md_str_to_image(
answer,
output_path,
max_retries=self._image_render_retries,
render_timeout_seconds=self._image_render_timeout_seconds,
html_timeout_seconds=min(30, self._image_render_timeout_seconds),
),
timeout=total_timeout,
)
self.LOG.info(f"成功生成图片: {spath}")
except Exception as e:
self.LOG.error(f"生成图片失败: {e}", exc_info=True)

View File

@@ -526,7 +526,35 @@ async def html_to_image(html_content, output_image):
await safe_close_browser(browser)
async def convert_md_str_to_image(md_content: str, output_image: str, max_retries: int = 2) -> str:
async def _await_with_progress(coro, timeout_seconds: int, stage_name: str, progress_interval_seconds: int = 10):
"""等待协程并周期输出进度,避免长时间无日志看起来像假死。"""
task = asyncio.create_task(coro)
start_ts = time.monotonic()
next_progress_at = progress_interval_seconds
try:
while True:
done, _ = await asyncio.wait({task}, timeout=1.0)
if done:
return task.result()
elapsed = int(time.monotonic() - start_ts)
if elapsed >= next_progress_at:
logger.info(f"[md2img] 阶段进行中: {stage_name}, elapsed={elapsed}s/{timeout_seconds}s")
next_progress_at += progress_interval_seconds
if elapsed >= timeout_seconds:
task.cancel()
raise asyncio.TimeoutError(f"[md2img] 阶段超时: {stage_name}, timeout={timeout_seconds}s")
finally:
if not task.done():
task.cancel()
async def convert_md_str_to_image(
md_content: str,
output_image: str,
max_retries: int = 2,
render_timeout_seconds: int = 90,
html_timeout_seconds: int = 30,
) -> str:
if not md_content:
raise ValueError("Markdown content cannot be empty")
@@ -538,19 +566,47 @@ async def convert_md_str_to_image(md_content: str, output_image: str, max_retrie
last_error = None
for attempt in range(max_retries):
try:
logger.debug(f"尝试第 {attempt + 1}/{max_retries} 次生成图片")
attempt_no = attempt + 1
logger.debug(
f"尝试第 {attempt_no}/{max_retries} 次生成图片 "
f"(html_timeout={html_timeout_seconds}s, render_timeout={render_timeout_seconds}s)"
)
if output_image_path.exists():
os.remove(str(output_image_path))
full_html = await md_str_to_html_content(md_content)
await html_to_image(full_html, str(output_image_path))
stage_start = time.monotonic()
# 阶段一Markdown -> HTML。加超时可避免极端文本导致长期阻塞。
logger.info(f"[md2img] 开始阶段: markdown_to_html, attempt={attempt_no}/{max_retries}")
full_html = await _await_with_progress(
md_str_to_html_content(md_content),
timeout_seconds=max(5, int(html_timeout_seconds)),
stage_name="markdown_to_html",
)
logger.debug(f"{attempt_no} 次 HTML 生成耗时: {time.monotonic() - stage_start:.2f}s")
# 阶段二Playwright 渲染截图。加超时防止浏览器进程异常卡死。
stage_start = time.monotonic()
logger.info(f"[md2img] 开始阶段: html_to_image, attempt={attempt_no}/{max_retries}")
await _await_with_progress(
html_to_image(full_html, str(output_image_path)),
timeout_seconds=max(10, int(render_timeout_seconds)),
stage_name="html_to_image",
)
logger.debug(f"{attempt_no} 次截图耗时: {time.monotonic() - stage_start:.2f}s")
image_size = os.path.getsize(str(output_image_path))
if image_size < 1024:
raise RuntimeError(f"图片生成异常,大小仅为: {image_size} bytes")
logger.info(f"图片成功生成:{output_image_path}")
return str(output_image_path.resolve())
except asyncio.TimeoutError as e:
last_error = RuntimeError(
f"图片生成超时(attempt={attempt_no}/{max_retries}, "
f"html_timeout={html_timeout_seconds}s, render_timeout={render_timeout_seconds}s)"
)
logger.warning(str(last_error))
except Exception as e:
last_error = e
logger.warning(f"{attempt + 1} 次尝试失败: {e}")
logger.warning(f"{attempt_no} 次尝试失败: {e}")
if attempt < max_retries - 1:
await asyncio.sleep(1.5)