我已经把群总结这块从“前 120 字硬砍”改成了“关键句提炼 + 更高上限”。

已改文件:

context_builder.py
具体优化:

原来:
group_memory_summary 超过 120 字就直接 summary[:117] + "...",很容易把后面的关键结论截掉。
现在:
新增 _compact_group_summary(...)。
先按句拆分,再给句子打分,优先保留包含“结论/风险/报错/配置/策略/优化”等关键词的句子。
同时保留首句和末句,避免上下文断裂。
最终摘要上限提高到 420 字、最多 6 句,再做兜底截断。
prompt 字段名也改成 群长期摘要关键句,让模型更明确这是提炼后的重点。
This commit is contained in:
liuwei
2026-04-15 10:37:27 +08:00
parent 265f3835b4
commit 5ce60cbd95

View File

@@ -276,9 +276,7 @@ class ContextBuilder:
return "当前群没有特殊知识域限制。"
focus = ", ".join(group_profile.get("knowledge_focus", [])[:6])
boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6])
summary = str(group_profile.get("group_memory_summary", "") or "").replace("\n", " ").strip()
if len(summary) > 120:
summary = summary[:117] + "..."
summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""))
lines = [
f"群模式:{group_profile.get('mode', 'social')}",
f"知识域偏向:{group_profile.get('knowledge_domain', 'general')}(仅作理解倾向,不是每次都要显式提到)",
@@ -291,7 +289,7 @@ class ContextBuilder:
f"表达松弛度:{group_profile.get('expressiveness_style', '克制')}",
f"称呼强度:{group_profile.get('address_style', '低频称呼,默认直接接话')}",
f"可能相关的话题背景:{focus}" if focus else "",
f"群长期摘要:{summary}" if summary else "",
f"群长期摘要关键句{summary}" if summary else "",
f"历史推断社交风格:{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}"
if group_profile.get("group_memory_style")
else "",
@@ -313,6 +311,50 @@ class ContextBuilder:
]
).strip(" /")
@staticmethod
def _compact_group_summary(summary_text: str, max_chars: int = 420, max_sentences: int = 6) -> str:
text = str(summary_text or "").strip()
if not text:
return ""
text = re.sub(r"\s+", " ", text.replace("\n", " ").replace("\r", " ")).strip()
if len(text) <= max_chars:
return text
# 长摘要按句提炼,优先保留“结论/风险/动作/配置”等关键词句,避免简单截断丢重点。
sentences = [part.strip(" ,;。.!?:") for part in re.split(r"[。!?!?;\n]+", text) if part.strip()]
if not sentences:
return text[: max_chars - 3] + "..."
key_patterns = [
r"结论|核心|重点|关键|建议|方案|步骤|原因|影响|风险|注意|问题|异常|报错|故障|超时|阻塞",
r"配置|参数|阈值|策略|限制|回退|优化|修复|排查|上线|回滚|依赖|版本|兼容",
]
scored: List[tuple[int, int, str]] = []
for idx, sentence in enumerate(sentences):
score = 0
for pattern in key_patterns:
if re.search(pattern, sentence, flags=re.IGNORECASE):
score += 3
if re.search(r"\d", sentence):
score += 1
if 8 <= len(sentence) <= 80:
score += 1
if idx == 0 or idx == len(sentences) - 1:
score += 1
scored.append((score, idx, sentence))
chosen_indexes = {0, len(sentences) - 1}
for _, idx, _ in sorted(scored, key=lambda x: (-x[0], x[1])):
chosen_indexes.add(idx)
if len(chosen_indexes) >= max_sentences:
break
chosen = [sentences[idx] for idx in sorted(chosen_indexes)]
merged = "".join([item for item in chosen if item]).strip("")
if len(merged) <= max_chars:
return merged
return merged[: max_chars - 3].rstrip(" ,;。.!?:") + "..."
@staticmethod
def _build_quote_prompt(quote_context: Dict) -> str:
if not quote_context: