我已经把群总结这块从“前 120 字硬砍”改成了“关键句提炼 + 更高上限”。
已改文件: context_builder.py 具体优化: 原来: group_memory_summary 超过 120 字就直接 summary[:117] + "...",很容易把后面的关键结论截掉。 现在: 新增 _compact_group_summary(...)。 先按句拆分,再给句子打分,优先保留包含“结论/风险/报错/配置/策略/优化”等关键词的句子。 同时保留首句和末句,避免上下文断裂。 最终摘要上限提高到 420 字、最多 6 句,再做兜底截断。 prompt 字段名也改成 群长期摘要关键句,让模型更明确这是提炼后的重点。
This commit is contained in:
@@ -276,9 +276,7 @@ class ContextBuilder:
|
||||
return "当前群没有特殊知识域限制。"
|
||||
focus = ", ".join(group_profile.get("knowledge_focus", [])[:6])
|
||||
boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6])
|
||||
summary = str(group_profile.get("group_memory_summary", "") or "").replace("\n", " ").strip()
|
||||
if len(summary) > 120:
|
||||
summary = summary[:117] + "..."
|
||||
summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""))
|
||||
lines = [
|
||||
f"群模式:{group_profile.get('mode', 'social')}",
|
||||
f"知识域偏向:{group_profile.get('knowledge_domain', 'general')}(仅作理解倾向,不是每次都要显式提到)",
|
||||
@@ -291,7 +289,7 @@ class ContextBuilder:
|
||||
f"表达松弛度:{group_profile.get('expressiveness_style', '克制')}",
|
||||
f"称呼强度:{group_profile.get('address_style', '低频称呼,默认直接接话')}",
|
||||
f"可能相关的话题背景:{focus}" if focus else "",
|
||||
f"群长期摘要:{summary}" if summary else "",
|
||||
f"群长期摘要关键句:{summary}" if summary else "",
|
||||
f"历史推断社交风格:{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}"
|
||||
if group_profile.get("group_memory_style")
|
||||
else "",
|
||||
@@ -313,6 +311,50 @@ class ContextBuilder:
|
||||
]
|
||||
).strip(" /")
|
||||
|
||||
@staticmethod
|
||||
def _compact_group_summary(summary_text: str, max_chars: int = 420, max_sentences: int = 6) -> str:
|
||||
text = str(summary_text or "").strip()
|
||||
if not text:
|
||||
return ""
|
||||
text = re.sub(r"\s+", " ", text.replace("\n", " ").replace("\r", " ")).strip()
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
|
||||
# 长摘要按句提炼,优先保留“结论/风险/动作/配置”等关键词句,避免简单截断丢重点。
|
||||
sentences = [part.strip(" ,,;;。.!?!?::") for part in re.split(r"[。!?!?;;\n]+", text) if part.strip()]
|
||||
if not sentences:
|
||||
return text[: max_chars - 3] + "..."
|
||||
|
||||
key_patterns = [
|
||||
r"结论|核心|重点|关键|建议|方案|步骤|原因|影响|风险|注意|问题|异常|报错|故障|超时|阻塞",
|
||||
r"配置|参数|阈值|策略|限制|回退|优化|修复|排查|上线|回滚|依赖|版本|兼容",
|
||||
]
|
||||
scored: List[tuple[int, int, str]] = []
|
||||
for idx, sentence in enumerate(sentences):
|
||||
score = 0
|
||||
for pattern in key_patterns:
|
||||
if re.search(pattern, sentence, flags=re.IGNORECASE):
|
||||
score += 3
|
||||
if re.search(r"\d", sentence):
|
||||
score += 1
|
||||
if 8 <= len(sentence) <= 80:
|
||||
score += 1
|
||||
if idx == 0 or idx == len(sentences) - 1:
|
||||
score += 1
|
||||
scored.append((score, idx, sentence))
|
||||
|
||||
chosen_indexes = {0, len(sentences) - 1}
|
||||
for _, idx, _ in sorted(scored, key=lambda x: (-x[0], x[1])):
|
||||
chosen_indexes.add(idx)
|
||||
if len(chosen_indexes) >= max_sentences:
|
||||
break
|
||||
|
||||
chosen = [sentences[idx] for idx in sorted(chosen_indexes)]
|
||||
merged = ";".join([item for item in chosen if item]).strip(";")
|
||||
if len(merged) <= max_chars:
|
||||
return merged
|
||||
return merged[: max_chars - 3].rstrip(" ,,;;。.!?!?::") + "..."
|
||||
|
||||
@staticmethod
|
||||
def _build_quote_prompt(quote_context: Dict) -> str:
|
||||
if not quote_context:
|
||||
|
||||
Reference in New Issue
Block a user