message_summary模板模式改为JSON优先解析并同步Dify提示词
This commit is contained in:
@@ -579,6 +579,180 @@ class MessageSummaryPlugin(MessagePluginInterface):
|
||||
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
def _extract_json_object_from_text(cls, raw_text: str) -> Optional[Dict[str, Any]]:
|
||||
"""从文本中提取 JSON 对象(优先服务 template 模式)。"""
|
||||
# 设计说明:
|
||||
# 1. 优先支持三类常见返回:纯 JSON、```json 代码块、被前后说明文字包裹的 JSON;
|
||||
# 2. 仅返回 dict,避免数组/字符串误入模板渲染链路;
|
||||
# 3. 解析失败时返回 None,不中断主流程,后续自动回退 Markdown 结构提取。
|
||||
text = str(raw_text or "").strip()
|
||||
if not text:
|
||||
return None
|
||||
|
||||
# 场景一:整段就是 JSON 对象。
|
||||
try:
|
||||
if text.startswith("{") and text.endswith("}"):
|
||||
parsed = json.loads(text)
|
||||
if isinstance(parsed, dict):
|
||||
return parsed
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 场景二:```json ... ``` 包裹。
|
||||
fenced_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text, flags=re.IGNORECASE)
|
||||
if fenced_match:
|
||||
candidate = str(fenced_match.group(1) or "").strip()
|
||||
try:
|
||||
parsed = json.loads(candidate)
|
||||
if isinstance(parsed, dict):
|
||||
return parsed
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 场景三:文本中夹杂 JSON。采用大括号包围段做兜底提取。
|
||||
left = text.find("{")
|
||||
right = text.rfind("}")
|
||||
if left >= 0 and right > left:
|
||||
candidate = text[left:right + 1].strip()
|
||||
try:
|
||||
parsed = json.loads(candidate)
|
||||
if isinstance(parsed, dict):
|
||||
return parsed
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _normalize_json_text_list(cls, value: Any, limit: int = 6, item_max_len: int = 120) -> List[str]:
|
||||
"""把 JSON 字段标准化为字符串列表。"""
|
||||
# 设计说明:
|
||||
# 1. 兼容字符串、数组、混合对象等脏数据输入;
|
||||
# 2. 统一做 Markdown 行内清理,避免样式噪音进入模板;
|
||||
# 3. 强制长度与条数上限,防止单条过长撑爆卡片布局。
|
||||
texts: List[str] = []
|
||||
if isinstance(value, str):
|
||||
candidate = cls._strip_markdown_inline(value).strip()
|
||||
if candidate:
|
||||
texts.append(candidate[:item_max_len])
|
||||
return texts[:limit]
|
||||
if not isinstance(value, list):
|
||||
return texts
|
||||
|
||||
for item in value:
|
||||
if isinstance(item, str):
|
||||
candidate = cls._strip_markdown_inline(item).strip()
|
||||
elif isinstance(item, dict):
|
||||
candidate = cls._strip_markdown_inline(
|
||||
str(item.get("text") or item.get("title") or item.get("value") or "")
|
||||
).strip()
|
||||
else:
|
||||
candidate = cls._strip_markdown_inline(str(item or "")).strip()
|
||||
if not candidate:
|
||||
continue
|
||||
texts.append(candidate[:item_max_len])
|
||||
if len(texts) >= limit:
|
||||
break
|
||||
return texts
|
||||
|
||||
@classmethod
|
||||
def _extract_template_json_data(cls, summary_text: str) -> Optional[Dict[str, Any]]:
|
||||
"""提取 template 模式专用 JSON 结构。"""
|
||||
# 说明:
|
||||
# 1. 允许 LLM 按固定 schema 输出 JSON,渲染稳定性显著高于 Markdown 再解析;
|
||||
# 2. 这里做“宽松字段兼容”,便于后续提示词小幅调整也不影响线上;
|
||||
# 3. 只有检测到有效 JSON 且关键字段存在时才返回,避免误判。
|
||||
payload = cls._extract_json_object_from_text(summary_text)
|
||||
if not payload:
|
||||
return None
|
||||
|
||||
title = cls._strip_markdown_inline(
|
||||
str(payload.get("title") or payload.get("document_title") or payload.get("doc_title") or "")
|
||||
).strip()
|
||||
lead = cls._strip_markdown_inline(
|
||||
str(payload.get("lead") or payload.get("summary_lead") or payload.get("overview") or "")
|
||||
).strip()
|
||||
fallback_text = cls._strip_markdown_inline(
|
||||
str(payload.get("fallback_text") or payload.get("raw_summary") or "")
|
||||
).strip()
|
||||
|
||||
# 解析话题卡片。
|
||||
topic_cards: List[Dict[str, Any]] = []
|
||||
topics = payload.get("topics")
|
||||
if isinstance(topics, list):
|
||||
for raw_topic in topics:
|
||||
if not isinstance(raw_topic, dict):
|
||||
continue
|
||||
topic_title = cls._clean_topic_title(str(raw_topic.get("title") or raw_topic.get("name") or ""))
|
||||
if not topic_title:
|
||||
topic_title = "未命名话题"
|
||||
overview_points = cls._normalize_json_text_list(
|
||||
raw_topic.get("overview_points") or raw_topic.get("key_points") or raw_topic.get("highlights"),
|
||||
limit=3,
|
||||
item_max_len=120,
|
||||
)
|
||||
analysis_points = cls._normalize_json_text_list(
|
||||
raw_topic.get("analysis_points") or raw_topic.get("analysis"),
|
||||
limit=2,
|
||||
item_max_len=120,
|
||||
)
|
||||
quote_text = cls._strip_markdown_inline(str(raw_topic.get("quote_text") or raw_topic.get("quote") or "")).strip()
|
||||
time_range = cls._strip_markdown_inline(str(raw_topic.get("time_range") or raw_topic.get("time") or "")).strip()
|
||||
participants = cls._strip_markdown_inline(
|
||||
str(raw_topic.get("participants") or raw_topic.get("participant_count") or "")
|
||||
).strip()
|
||||
|
||||
topic_cards.append(
|
||||
{
|
||||
"title": topic_title[:42],
|
||||
"time_range": time_range[:58],
|
||||
"participants": participants[:42],
|
||||
"overview_points": overview_points,
|
||||
"analysis_points": analysis_points,
|
||||
"quote_text": quote_text[:120],
|
||||
}
|
||||
)
|
||||
if len(topic_cards) >= 5:
|
||||
break
|
||||
|
||||
# 解析命名模块。
|
||||
named_modules = {
|
||||
"shared_resources": cls._normalize_json_text_list(payload.get("shared_resources"), limit=6, item_max_len=110),
|
||||
"marketplace": cls._normalize_json_text_list(payload.get("marketplace"), limit=6, item_max_len=110),
|
||||
"unresolved_pool": cls._normalize_json_text_list(payload.get("unresolved_pool"), limit=4, item_max_len=110),
|
||||
"core_points": cls._normalize_json_text_list(payload.get("core_knowledge_points") or payload.get("core_points"), limit=4, item_max_len=110),
|
||||
"top_contributors": cls._normalize_json_text_list(payload.get("top_contributors"), limit=3, item_max_len=18),
|
||||
}
|
||||
|
||||
# 构造 sections 给现有统计提取逻辑复用。
|
||||
sections: List[Dict[str, Any]] = []
|
||||
for topic in topic_cards:
|
||||
items: List[Dict[str, str]] = []
|
||||
for line in topic.get("overview_points", []):
|
||||
items.append({"kind": "bullet", "text": line})
|
||||
for line in topic.get("analysis_points", []):
|
||||
items.append({"kind": "paragraph", "text": line})
|
||||
if topic.get("quote_text"):
|
||||
items.append({"kind": "quote", "text": topic["quote_text"]})
|
||||
sections.append({"title": topic.get("title", "未命名话题"), "items": items})
|
||||
|
||||
if not topic_cards and not any(named_modules.values()) and not lead and not title:
|
||||
return None
|
||||
|
||||
if not lead and topic_cards:
|
||||
lead = (topic_cards[0].get("overview_points") or [""])[0]
|
||||
if not fallback_text:
|
||||
fallback_text = lead or "暂无总结内容。"
|
||||
|
||||
return {
|
||||
"document_title": title,
|
||||
"lead": lead or "暂无总结内容。",
|
||||
"fallback_text": fallback_text,
|
||||
"sections": sections,
|
||||
"topic_cards": topic_cards,
|
||||
"named_modules": named_modules,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _build_summary_layout_data(cls, summary_text: str) -> Dict[str, Any]:
|
||||
"""把 LLM 总结文本重排为模板可直接消费的结构化数据。
|
||||
@@ -1186,17 +1360,27 @@ class MessageSummaryPlugin(MessagePluginInterface):
|
||||
# 1. 不再把 LLM 原文直接转 HTML 内嵌到模板;
|
||||
# 2. 先结构化解析文本,再由模板按组件渲染,稳定控制最终排版。
|
||||
renderer = HtmlTemplateRenderer()
|
||||
layout_data = self._build_summary_layout_data(summary_text)
|
||||
# 解析策略:
|
||||
# 1. template 模式优先吃 JSON(稳定、可控、低漂移);
|
||||
# 2. JSON 不可用时再回退 Markdown 结构解析,保持兼容。
|
||||
json_layout_data = self._extract_template_json_data(summary_text)
|
||||
layout_data = json_layout_data or self._build_summary_layout_data(summary_text)
|
||||
metrics_data = self._build_summary_template_metrics(
|
||||
message_stats=message_stats,
|
||||
layout_data=layout_data,
|
||||
metadata=metadata,
|
||||
)
|
||||
sections = layout_data.get("sections", []) or []
|
||||
topic_cards = self._build_topic_cards_from_sections(sections, limit=5)
|
||||
if json_layout_data and json_layout_data.get("topic_cards"):
|
||||
topic_cards = json_layout_data.get("topic_cards", [])[:5]
|
||||
else:
|
||||
topic_cards = self._build_topic_cards_from_sections(sections, limit=5)
|
||||
topic_titles = [card.get("title", "") for card in topic_cards]
|
||||
auxiliary_sections = self._build_auxiliary_sections(sections, topic_titles)
|
||||
named_modules = self._build_template_named_modules(sections)
|
||||
if json_layout_data and isinstance(json_layout_data.get("named_modules"), dict):
|
||||
named_modules = json_layout_data.get("named_modules", {})
|
||||
else:
|
||||
named_modules = self._build_template_named_modules(sections)
|
||||
resource_hub_items = self._build_resource_hub_items(named_modules.get("shared_resources", []))
|
||||
# 说明:
|
||||
# 1. 这里注入“本地字体 CSS”到模板,避免依赖 Google Fonts 等外网资源;
|
||||
|
||||
@@ -322,87 +322,41 @@ workflow:
|
||||
- id: template_system_prompt
|
||||
role: system
|
||||
text: |
|
||||
你是一名「微信群总结结构化编辑官」,你的输出将用于 Gemini 风格总结卡片渲染。
|
||||
你是一名「微信群总结结构化编辑官」,输出将直接用于总结卡片模板渲染。
|
||||
|
||||
目标:
|
||||
1. 让内容结构尽量贴合 gemini-code 模板模块;
|
||||
2. 信息密度高,但保持短句、可扫描;
|
||||
3. 不做空泛抒情,不要写冗长大段落。
|
||||
核心要求:
|
||||
1. 只输出 JSON 对象,不要输出 Markdown,不要输出解释文本;
|
||||
2. 不要使用 ```json 代码块包裹;
|
||||
3. 必须覆盖至少 5 个话题;
|
||||
4. 不要翻译昵称,不要改写 @昵称;
|
||||
5. 字段缺失时用空字符串或空数组,禁止省略关键字段。
|
||||
|
||||
必须遵守:
|
||||
1. 必须覆盖至少 5 个话题(缺少时也要从聊天里归并凑足 5 个主题);
|
||||
2. 不要翻译用户昵称,不要改写 @昵称;
|
||||
3. 输出必须是纯 Markdown,不要 JSON,不要 ``` 代码块;
|
||||
4. 每个话题都要包含:
|
||||
- 时段
|
||||
- 参与人数
|
||||
- 核心观点回顾(2-3条)
|
||||
- 客观分析(1-2条)
|
||||
- 亮点瞬间(1条)
|
||||
5. 每条 bullet 尽量不超过 40 字;
|
||||
6. 结论用“可执行建议”表达,不要空话。
|
||||
输出 JSON Schema(字段名必须一致):
|
||||
{
|
||||
"title": "字符串,整篇标题",
|
||||
"lead": "字符串,2-3句导语",
|
||||
"topics": [
|
||||
{
|
||||
"title": "话题标题",
|
||||
"time_range": "时段,如 09:20-10:10",
|
||||
"participants": "参与人数,如 18人",
|
||||
"overview_points": ["核心观点1", "核心观点2", "核心观点3"],
|
||||
"analysis_points": ["客观分析1", "客观分析2"],
|
||||
"quote_text": "亮点金句或高光总结"
|
||||
}
|
||||
],
|
||||
"shared_resources": ["资源项1", "资源项2"],
|
||||
"marketplace": ["交易项1", "交易项2"],
|
||||
"unresolved_pool": ["待解问题1", "待解问题2"],
|
||||
"core_knowledge_points": ["知识点1", "知识点2"],
|
||||
"top_contributors": ["昵称A", "昵称B", "昵称C"]
|
||||
}
|
||||
|
||||
输出格式请严格按以下骨架:
|
||||
|
||||
# 🌟「[群名] - [最新日期] 总结」🌟
|
||||
|
||||
## ⚡ 一分钟速览
|
||||
- 今日消息数:[总数]
|
||||
- 最热时段:[时段]
|
||||
|
||||
### 🏆 核心话题与结论
|
||||
1. [话题1简述]:[一句结论]
|
||||
2. [话题2简述]:[一句结论]
|
||||
3. [话题3简述]:[一句结论]
|
||||
4. [话题4简述]:[一句结论]
|
||||
5. [话题5简述]:[一句结论]
|
||||
|
||||
### 💰 交易/资源快报
|
||||
- [@用户A] [卖货/资源]
|
||||
- [@用户B] [卖货/资源]
|
||||
|
||||
### 📌 今日总结
|
||||
[1-2句结论 + 后续建议]
|
||||
|
||||
## 🌌 话题详情
|
||||
### 1️⃣ 【[话题1]】
|
||||
- **时段**:[开始]-[结束]
|
||||
- **参与人数**:[人数]
|
||||
#### 🔍 核心观点回顾
|
||||
- [观点1]
|
||||
- [观点2]
|
||||
#### 🧩 客观分析
|
||||
- [共识/分歧/价值]
|
||||
#### 🔥 亮点瞬间
|
||||
> [金句或高光总结]
|
||||
|
||||
(继续话题2~5,保持同结构)
|
||||
|
||||
## 🔗 Shared Resources
|
||||
- [仓库/文档/工具链接 + 一句说明]
|
||||
- [仓库/文档/工具链接 + 一句说明]
|
||||
|
||||
## 🛒 Marketplace
|
||||
- [出/求] [标的]:[价格或状态]
|
||||
- [出/求] [标的]:[价格或状态]
|
||||
|
||||
## ❓ Unresolved Pool
|
||||
- [待解问题1]
|
||||
- [待解问题2]
|
||||
|
||||
## 🧠 Core Knowledge Points
|
||||
- [关键配置/经验1]
|
||||
- [关键配置/经验2]
|
||||
|
||||
## 🎖️ 今日荣誉榜
|
||||
### 🏆 群聊 MVP:[@用户N]
|
||||
- 理由1
|
||||
- 理由2
|
||||
|
||||
## 👥 Top Contributors
|
||||
- [昵称A]
|
||||
- [昵称B]
|
||||
- [昵称C]
|
||||
生成约束:
|
||||
1. topics 数组长度必须为 5(不足请归并补齐);
|
||||
2. overview_points 每个话题 2-3 条,analysis_points 每个话题 1-2 条;
|
||||
3. 每条文本尽量 <= 40 字,短句可扫描;
|
||||
4. 结论要具体,不要空泛。
|
||||
- id: template_user_prompt
|
||||
role: user
|
||||
text: '{{#1775526517808.query#}}'
|
||||
|
||||
BIN
temp/md2image/summary_demo_render_latest.png
Normal file
BIN
temp/md2image/summary_demo_render_latest.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 266 KiB |
Reference in New Issue
Block a user