import subprocess import time from pathlib import Path import shutil from typing import Optional, Tuple import threading from concurrent.futures import Future as ConcurrentFuture import psutil from playwright.async_api import async_playwright import os import asyncio import re from loguru import logger try: import markdown except ImportError: markdown = None META_KEYWORDS = ["群", "群名", "时间", "日期", "成员", "消息", "统计", "总结", "来源", "生成", "记录"] STAT_PILL_CLASSES = { "总": "total", "人数": "people", "文本": "text", "图片": "image", "视频": "video", "链接": "link", "表情": "emoji", } def _extract_stats_pills_from_markdown(md_content: str) -> str: text = str(md_content or "") pattern = re.compile( r"(^##\s+群概览\s*\n)([^\n]+)(?=\n(?:\n|##\s|###\s|$))", re.M, ) def replace(match): stats_line = match.group(2).strip() parts = [part.strip() for part in stats_line.split("·") if part.strip()] pills = [] for part in parts: item_match = re.match(r"(?:\*\*)?([^*\s]+)(?:\*\*)?\s+(\d+)", part) if not item_match: continue label = item_match.group(1).strip() value = item_match.group(2).strip() kind = STAT_PILL_CLASSES.get(label, "default") pills.append( f'{label}{value}' ) if not pills: return match.group(0) return match.group(1) + f'
{text}
") paragraph_lines = [] def close_ul(): nonlocal in_ul if in_ul: html_parts.append("") in_ul = False for raw_line in lines: line = raw_line.rstrip() stripped = line.strip() if not stripped: flush_paragraph() close_ul() continue if stripped.startswith("# "): flush_paragraph() close_ul() html_parts.append(f"