优化自动回复对群摘要的结构化利用
This commit is contained in:
@@ -190,6 +190,14 @@ repeat_min_length = 4
|
||||
[logging]
|
||||
debug = true
|
||||
|
||||
[group_profiles]
|
||||
# 群长期记忆不再只读“最新一天那篇总结”:
|
||||
# 1. 这里读取最近 5 份群摘要,再聚合成稳定主题/近期重点/未决问题;
|
||||
# 2. 自动回复消费时优先走这些结构字段,减少 markdown 大段文本的理解损耗;
|
||||
# 3. item_limit 控制每类字段带给模型的条数,避免群背景过重。
|
||||
summary_history_limit = 5
|
||||
summary_item_limit = 4
|
||||
|
||||
[group_profiles.default]
|
||||
mode = "social"
|
||||
persona_id = "xiaoniu"
|
||||
|
||||
@@ -359,11 +359,24 @@ class ContextBuilder:
|
||||
# 3. 更细的群事实、群关系仍走相关性增强链路。
|
||||
if not group_profile:
|
||||
return ""
|
||||
structured = group_profile.get("group_memory_structured", {}) or {}
|
||||
summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""), max_chars=220, max_sentences=4)
|
||||
focus = ", ".join(group_profile.get("knowledge_focus", [])[:4])
|
||||
memory_style = ContextBuilder._build_style_summary(group_profile.get("group_memory_style", {}))
|
||||
stable_topics = ContextBuilder._stringify_items(structured.get("stable_topics", []) or [], 4)
|
||||
recent_points = ContextBuilder._stringify_items(structured.get("recent_key_points", []) or [], 3)
|
||||
unresolved_points = ContextBuilder._stringify_items(structured.get("unresolved_points", []) or [], 3)
|
||||
resource_clues = ContextBuilder._stringify_items(structured.get("resource_clues", []) or [], 3)
|
||||
role_hints = ContextBuilder._stringify_items(structured.get("role_hints", []) or [], 3)
|
||||
summary_days = int(group_profile.get("group_memory_summary_days", 0) or 0)
|
||||
lines = [
|
||||
"群长期背景:",
|
||||
f"摘要观察窗口:最近 {summary_days} 份群总结" if summary_days > 0 else "",
|
||||
f"稳定主题:{stable_topics}" if stable_topics else "",
|
||||
f"近期重点:{recent_points}" if recent_points else "",
|
||||
f"未决问题:{unresolved_points}" if unresolved_points else "",
|
||||
f"共享资源/线索:{resource_clues}" if resource_clues else "",
|
||||
f"角色线索:{role_hints}" if role_hints else "",
|
||||
f"长期摘要:{summary}" if summary else "",
|
||||
f"常聊方向:{focus}" if focus else "",
|
||||
f"历史社交风格:{memory_style}" if memory_style else "",
|
||||
@@ -374,9 +387,15 @@ class ContextBuilder:
|
||||
def _build_group_profile_prompt(group_profile: Dict) -> str:
|
||||
if not group_profile:
|
||||
return "当前群没有特殊知识域限制。"
|
||||
structured = group_profile.get("group_memory_structured", {}) or {}
|
||||
focus = ", ".join(group_profile.get("knowledge_focus", [])[:6])
|
||||
boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6])
|
||||
summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""))
|
||||
stable_topics = ContextBuilder._stringify_items(structured.get("stable_topics", []) or [], 4)
|
||||
recent_points = ContextBuilder._stringify_items(structured.get("recent_key_points", []) or [], 3)
|
||||
unresolved_points = ContextBuilder._stringify_items(structured.get("unresolved_points", []) or [], 3)
|
||||
resource_clues = ContextBuilder._stringify_items(structured.get("resource_clues", []) or [], 3)
|
||||
role_hints = ContextBuilder._stringify_items(structured.get("role_hints", []) or [], 3)
|
||||
lines = [
|
||||
f"群模式:{group_profile.get('mode', 'social')}",
|
||||
f"知识域偏向:{group_profile.get('knowledge_domain', 'general')}(仅作理解倾向,不是每次都要显式提到)",
|
||||
@@ -389,6 +408,15 @@ class ContextBuilder:
|
||||
f"表达松弛度:{group_profile.get('expressiveness_style', '克制')}",
|
||||
f"称呼强度:{group_profile.get('address_style', '低频称呼,默认直接接话')}",
|
||||
f"可能相关的话题背景:{focus}" if focus else "",
|
||||
# 这里显式把群摘要结构字段展开给模型:
|
||||
# 1. LLM 更擅长消费清晰字段,而不是再从 markdown 文案里二次猜测;
|
||||
# 2. “稳定主题/近期重点/未决问题”分别承载不同决策用途,混成一段反而不好用;
|
||||
# 3. 仍然保留原摘要关键句,作为字段缺失时的人类可读兜底。
|
||||
f"群摘要稳定主题:{stable_topics}" if stable_topics else "",
|
||||
f"群摘要近期重点:{recent_points}" if recent_points else "",
|
||||
f"群摘要未决问题:{unresolved_points}" if unresolved_points else "",
|
||||
f"群摘要资源线索:{resource_clues}" if resource_clues else "",
|
||||
f"群摘要角色线索:{role_hints}" if role_hints else "",
|
||||
f"群长期摘要关键句:{summary}" if summary else "",
|
||||
f"历史推断社交风格:{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}"
|
||||
if group_profile.get("group_memory_style")
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from collections import Counter
|
||||
from typing import Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from db.message_storage import MessageStorageDB
|
||||
from db.message_summary_db import MessageSummaryDBOperator
|
||||
@@ -19,17 +21,30 @@ class GroupMemoryService:
|
||||
SHARPNESS_KEYWORDS = ["菜", "蠢", "逆天", "离谱", "抽象", "别搞", "别整", "你这", "搁这", "典"]
|
||||
RELAXED_KEYWORDS = ["随便", "行吧", "都行", "慢慢来", "不急", "摸鱼", "唠", "水群", "先这样"]
|
||||
SERIOUS_KEYWORDS = ["报错", "排查", "日志", "配置", "部署", "接口", "重现", "修复", "方案", "联调"]
|
||||
UNRESOLVED_HINTS = ["未解决", "待解决", "待确认", "阻塞", "卡在", "异常", "报错", "问题", "风险", "todo", "TODO"]
|
||||
RESOURCE_HINTS = ["文档", "链接", "地址", "仓库", "repo", "资料", "教程", "命令", "配置", "脚本", "接口"]
|
||||
ROLE_HINTS = ["贡献", "活跃", "答疑", "负责", "推进", "owner", "结论", "方案位", "排查"]
|
||||
|
||||
def __init__(self, db_manager, config: Dict):
|
||||
self.config = config or {}
|
||||
self.message_db = MessageStorageDB(db_manager)
|
||||
self.summary_db = MessageSummaryDBOperator(db_manager)
|
||||
# 群聊自动回复不应该只盯着“昨天那一篇总结”:
|
||||
# 1. 日摘要天然是日维度,如果只读最新一条,很容易把短期偶发波动误当成长期背景;
|
||||
# 2. 这里改成读取最近几份摘要,再做轻量聚合,能让群长期画像更稳定;
|
||||
# 3. 同时保留条数上限,避免群摘要本身反过来把 prompt 挤爆。
|
||||
self.summary_history_limit = max(int(self.config.get("summary_history_limit", 5) or 5), 1)
|
||||
self.summary_item_limit = max(int(self.config.get("summary_item_limit", 4) or 4), 1)
|
||||
|
||||
def build_group_memory_profile(self, room_id: str, group_name: str = "") -> Dict:
|
||||
recent_messages = self.message_db.get_messages_for_summary(
|
||||
room_id, hours_ago=48, min_messages=20, max_hours=168, max_results=300
|
||||
) or []
|
||||
summary_text = self._load_recent_summary_text(room_id)
|
||||
summary_records = self._load_recent_summary_records(room_id)
|
||||
structured_summary = self._build_structured_summary_digest(summary_records)
|
||||
summary_text = str(structured_summary.get("summary_text", "") or "").strip()
|
||||
summary_corpus = self._build_summary_corpus(summary_records, structured_summary)
|
||||
|
||||
topic_counter = Counter()
|
||||
domain_counter = Counter()
|
||||
humor_hits = 0
|
||||
@@ -58,7 +73,9 @@ class GroupMemoryService:
|
||||
relaxed_hits += self._count_hits(content, self.RELAXED_KEYWORDS)
|
||||
serious_hits += self._count_hits(content, self.SERIOUS_KEYWORDS)
|
||||
|
||||
summary_lower = summary_text.lower()
|
||||
# 群摘要这层不再只吃一整段 markdown 文案,而是优先吃已经抽好的结构字段。
|
||||
# 这样领域判断和主题判断会更稳定,模型后续也更容易利用这些结论。
|
||||
summary_lower = summary_corpus.lower()
|
||||
for domain, keywords in self.DOMAIN_KEYWORDS.items():
|
||||
hits = sum(1 for keyword in keywords if keyword and keyword.lower() in summary_lower)
|
||||
if hits:
|
||||
@@ -72,7 +89,10 @@ class GroupMemoryService:
|
||||
serious_hits += self._count_hits(summary_lower, self.SERIOUS_KEYWORDS) * 2
|
||||
|
||||
inferred_domain = domain_counter.most_common(1)[0][0] if domain_counter else "general"
|
||||
focus_topics = [item for item, _ in topic_counter.most_common(6)]
|
||||
focus_topics = self._merge_unique(
|
||||
[item for item, _ in topic_counter.most_common(6)],
|
||||
structured_summary.get("stable_topics", []) or [],
|
||||
)[:6]
|
||||
style_profile = self._infer_style_profile(
|
||||
humor_hits=humor_hits,
|
||||
sharpness_hits=sharpness_hits,
|
||||
@@ -88,6 +108,9 @@ class GroupMemoryService:
|
||||
"message_sample_count": len(recent_messages),
|
||||
"summary_text": summary_text,
|
||||
"style_profile": style_profile,
|
||||
"structured_summary": structured_summary,
|
||||
"summary_source_count": len(summary_records),
|
||||
"summary_timeline": structured_summary.get("timeline", []) or [],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -134,22 +157,412 @@ class GroupMemoryService:
|
||||
"expressiveness_style": expressiveness_style,
|
||||
}
|
||||
|
||||
def _load_recent_summary_text(self, room_id: str) -> str:
|
||||
candidates: List[Dict] = []
|
||||
for summary_type in ("daily", "manual"):
|
||||
sql = """
|
||||
SELECT *
|
||||
FROM t_message_summary
|
||||
WHERE chatroom_id = %s AND summary_type = %s
|
||||
ORDER BY period_end DESC, update_time DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
rows = self.summary_db.execute_query(sql, (room_id, summary_type)) or []
|
||||
candidates.extend(rows)
|
||||
if not candidates:
|
||||
return ""
|
||||
candidates.sort(
|
||||
key=lambda item: (str(item.get("period_end", "")), str(item.get("update_time", ""))),
|
||||
reverse=True,
|
||||
def _load_recent_summary_records(self, room_id: str) -> List[Dict]:
|
||||
sql = """
|
||||
SELECT *
|
||||
FROM t_message_summary
|
||||
WHERE chatroom_id = %s AND summary_type IN ('daily', 'manual')
|
||||
ORDER BY period_end DESC, update_time DESC
|
||||
LIMIT %s
|
||||
"""
|
||||
rows = self.summary_db.execute_query(sql, (room_id, self.summary_history_limit)) or []
|
||||
records: List[Dict] = []
|
||||
seen_keys = set()
|
||||
for row in rows:
|
||||
normalized = self.summary_db._deserialize_row(dict(row)) or {}
|
||||
summary_type = str(normalized.get("summary_type", "") or "")
|
||||
period_key = str(normalized.get("period_key", "") or "")
|
||||
dedup_key = f"{summary_type}:{period_key}"
|
||||
if not period_key or dedup_key in seen_keys:
|
||||
continue
|
||||
seen_keys.add(dedup_key)
|
||||
structured = self._extract_structured_summary(normalized.get("summary_text", ""))
|
||||
normalized["structured_summary"] = structured
|
||||
records.append(normalized)
|
||||
return records
|
||||
|
||||
def _build_structured_summary_digest(self, records: List[Dict]) -> Dict:
|
||||
if not records:
|
||||
return {
|
||||
"stable_topics": [],
|
||||
"recent_key_points": [],
|
||||
"unresolved_points": [],
|
||||
"resource_clues": [],
|
||||
"role_hints": [],
|
||||
"timeline": [],
|
||||
"summary_text": "",
|
||||
}
|
||||
|
||||
stable_topic_scores: Counter[str] = Counter()
|
||||
key_point_scores: Counter[str] = Counter()
|
||||
unresolved_scores: Counter[str] = Counter()
|
||||
resource_scores: Counter[str] = Counter()
|
||||
role_scores: Counter[str] = Counter()
|
||||
timeline: List[str] = []
|
||||
|
||||
for index, record in enumerate(records):
|
||||
weight = max(self.summary_history_limit - index, 1)
|
||||
period_key = str(record.get("period_key", "") or "")
|
||||
structured = record.get("structured_summary", {}) or {}
|
||||
|
||||
for item in structured.get("topics", []) or []:
|
||||
stable_topic_scores[item] += weight * 2
|
||||
for item in structured.get("key_points", []) or []:
|
||||
key_point_scores[item] += weight
|
||||
for item in structured.get("unresolved_points", []) or []:
|
||||
unresolved_scores[item] += weight * 2
|
||||
for item in structured.get("resource_clues", []) or []:
|
||||
resource_scores[item] += weight
|
||||
for item in structured.get("role_hints", []) or []:
|
||||
role_scores[item] += weight
|
||||
|
||||
lead = str(structured.get("lead", "") or "").strip()
|
||||
if not lead:
|
||||
key_points = structured.get("key_points", []) or []
|
||||
lead = str(key_points[0] if key_points else "").strip()
|
||||
if period_key and lead:
|
||||
timeline.append(f"{period_key}: {lead}")
|
||||
|
||||
stable_topics = [item for item, _ in stable_topic_scores.most_common(self.summary_item_limit)]
|
||||
recent_key_points = [item for item, _ in key_point_scores.most_common(self.summary_item_limit)]
|
||||
unresolved_points = [item for item, _ in unresolved_scores.most_common(self.summary_item_limit)]
|
||||
resource_clues = [item for item, _ in resource_scores.most_common(self.summary_item_limit)]
|
||||
role_hints = [item for item, _ in role_scores.most_common(self.summary_item_limit)]
|
||||
summary_text = self._compose_structured_summary_text(
|
||||
stable_topics=stable_topics,
|
||||
recent_key_points=recent_key_points,
|
||||
unresolved_points=unresolved_points,
|
||||
resource_clues=resource_clues,
|
||||
role_hints=role_hints,
|
||||
)
|
||||
return str(candidates[0].get("summary_text", "") or "").strip()
|
||||
return {
|
||||
"stable_topics": stable_topics,
|
||||
"recent_key_points": recent_key_points,
|
||||
"unresolved_points": unresolved_points,
|
||||
"resource_clues": resource_clues,
|
||||
"role_hints": role_hints,
|
||||
"timeline": timeline[: self.summary_item_limit],
|
||||
"summary_text": summary_text,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _compose_structured_summary_text(
|
||||
*,
|
||||
stable_topics: List[str],
|
||||
recent_key_points: List[str],
|
||||
unresolved_points: List[str],
|
||||
resource_clues: List[str],
|
||||
role_hints: List[str],
|
||||
) -> str:
|
||||
parts = []
|
||||
if stable_topics:
|
||||
parts.append(f"稳定主题:{'、'.join(stable_topics[:4])}")
|
||||
if recent_key_points:
|
||||
parts.append(f"近期重点:{'、'.join(recent_key_points[:3])}")
|
||||
if unresolved_points:
|
||||
parts.append(f"未决问题:{'、'.join(unresolved_points[:3])}")
|
||||
if resource_clues:
|
||||
parts.append(f"常见资源:{'、'.join(resource_clues[:3])}")
|
||||
if role_hints:
|
||||
parts.append(f"群内角色线索:{'、'.join(role_hints[:3])}")
|
||||
return ";".join(parts)
|
||||
|
||||
def _build_summary_corpus(self, records: List[Dict], structured_summary: Dict) -> str:
|
||||
texts: List[str] = []
|
||||
for record in records:
|
||||
structured = record.get("structured_summary", {}) or {}
|
||||
texts.extend(structured.get("topics", []) or [])
|
||||
texts.extend(structured.get("key_points", []) or [])
|
||||
texts.extend(structured.get("unresolved_points", []) or [])
|
||||
texts.extend(structured.get("resource_clues", []) or [])
|
||||
texts.extend(structured.get("role_hints", []) or [])
|
||||
texts.extend(structured_summary.get("stable_topics", []) or [])
|
||||
texts.extend(structured_summary.get("recent_key_points", []) or [])
|
||||
texts.extend(structured_summary.get("unresolved_points", []) or [])
|
||||
texts.extend(structured_summary.get("resource_clues", []) or [])
|
||||
texts.extend(structured_summary.get("role_hints", []) or [])
|
||||
return " ".join([str(item).strip() for item in texts if str(item).strip()])
|
||||
|
||||
def _extract_structured_summary(self, raw_summary: str) -> Dict:
|
||||
payload = self._extract_json_object_from_text(raw_summary)
|
||||
if payload:
|
||||
return self._normalize_json_summary_payload(payload)
|
||||
return self._parse_markdown_summary(raw_summary)
|
||||
|
||||
def _normalize_json_summary_payload(self, payload: Dict[str, Any]) -> Dict:
|
||||
lead = self._clean_text(
|
||||
str(payload.get("lead") or payload.get("summary_lead") or payload.get("overview") or "")
|
||||
)
|
||||
topics: List[str] = []
|
||||
key_points: List[str] = []
|
||||
unresolved_points = self._normalize_text_list(
|
||||
payload.get("unresolved_pool") or payload.get("unresolved_points"),
|
||||
limit=self.summary_item_limit,
|
||||
)
|
||||
# 资源类字段来自不同 workflow 版本时,可能一边是字符串、一边是数组。
|
||||
# 这里分开标准化后再合并,避免直接相加时出现 str/list 类型冲突。
|
||||
resource_clues = self._normalize_text_list(
|
||||
payload.get("shared_resources"),
|
||||
limit=self.summary_item_limit,
|
||||
)
|
||||
resource_clues.extend(
|
||||
self._normalize_text_list(
|
||||
payload.get("marketplace"),
|
||||
limit=self.summary_item_limit,
|
||||
)
|
||||
)
|
||||
role_hints = self._normalize_text_list(payload.get("top_contributors"), limit=self.summary_item_limit)
|
||||
|
||||
raw_topics = payload.get("topics")
|
||||
if isinstance(raw_topics, list):
|
||||
for item in raw_topics:
|
||||
if isinstance(item, dict):
|
||||
title = self._clean_text(str(item.get("title") or item.get("name") or ""))
|
||||
if title:
|
||||
topics.append(title)
|
||||
key_points.extend(
|
||||
self._normalize_text_list(
|
||||
item.get("overview_points") or item.get("key_points") or item.get("highlights"),
|
||||
limit=2,
|
||||
)
|
||||
)
|
||||
key_points.extend(
|
||||
self._normalize_text_list(item.get("analysis_points") or item.get("analysis"), limit=1)
|
||||
)
|
||||
else:
|
||||
value = self._clean_text(str(item or ""))
|
||||
if value:
|
||||
topics.append(value)
|
||||
|
||||
key_points.extend(
|
||||
self._normalize_text_list(
|
||||
payload.get("core_knowledge_points") or payload.get("core_points"),
|
||||
limit=self.summary_item_limit,
|
||||
)
|
||||
)
|
||||
if not lead:
|
||||
lead = self._clean_text(
|
||||
str(payload.get("fallback_text") or payload.get("raw_summary") or "")
|
||||
)
|
||||
if not lead and key_points:
|
||||
lead = key_points[0]
|
||||
|
||||
return {
|
||||
"lead": lead,
|
||||
"topics": self._dedup_items(topics, self.summary_item_limit),
|
||||
"key_points": self._dedup_items(key_points, self.summary_item_limit),
|
||||
"unresolved_points": self._dedup_items(unresolved_points, self.summary_item_limit),
|
||||
"resource_clues": self._dedup_items(resource_clues, self.summary_item_limit),
|
||||
"role_hints": self._dedup_items(role_hints, self.summary_item_limit),
|
||||
}
|
||||
|
||||
def _parse_markdown_summary(self, raw_summary: str) -> Dict:
|
||||
text = self._extract_llm_payload_text(raw_summary)
|
||||
lines = [str(line or "").rstrip() for line in text.splitlines()]
|
||||
current_section = ""
|
||||
lead = ""
|
||||
topics: List[str] = []
|
||||
key_points: List[str] = []
|
||||
unresolved_points: List[str] = []
|
||||
resource_clues: List[str] = []
|
||||
role_hints: List[str] = []
|
||||
|
||||
for raw_line in lines:
|
||||
line = self._clean_text(raw_line)
|
||||
if not line:
|
||||
continue
|
||||
|
||||
heading_match = re.match(r"^#{1,6}\s*(.+)$", raw_line.strip())
|
||||
if heading_match:
|
||||
current_section = self._clean_text(heading_match.group(1))
|
||||
if not lead and not self._looks_like_generic_title(current_section):
|
||||
lead = current_section
|
||||
if current_section and self._looks_like_topic_title(current_section):
|
||||
topics.append(current_section)
|
||||
continue
|
||||
|
||||
bullet_match = re.match(r"^([-*+]|\d+[.)、])\s*(.+)$", raw_line.strip())
|
||||
if bullet_match:
|
||||
line = self._clean_text(bullet_match.group(2))
|
||||
|
||||
bucket = self._classify_summary_line(current_section, line)
|
||||
if bucket == "unresolved":
|
||||
unresolved_points.append(line)
|
||||
elif bucket == "resource":
|
||||
resource_clues.append(line)
|
||||
elif bucket == "role":
|
||||
role_hints.append(line)
|
||||
elif bucket == "topic":
|
||||
topics.append(line)
|
||||
else:
|
||||
key_points.append(line)
|
||||
if not lead and len(line) >= 6:
|
||||
lead = line
|
||||
|
||||
if not lead and key_points:
|
||||
lead = key_points[0]
|
||||
|
||||
return {
|
||||
"lead": lead,
|
||||
"topics": self._dedup_items(topics, self.summary_item_limit),
|
||||
"key_points": self._dedup_items(key_points, self.summary_item_limit),
|
||||
"unresolved_points": self._dedup_items(unresolved_points, self.summary_item_limit),
|
||||
"resource_clues": self._dedup_items(resource_clues, self.summary_item_limit),
|
||||
"role_hints": self._dedup_items(role_hints, self.summary_item_limit),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_llm_payload_text(summary_text: str) -> str:
|
||||
text = str(summary_text or "").strip()
|
||||
if not text:
|
||||
return ""
|
||||
try:
|
||||
if (text.startswith("{") and text.endswith("}")) or (text.startswith("[") and text.endswith("]")):
|
||||
payload = json.loads(text)
|
||||
if isinstance(payload, dict):
|
||||
for key in ("text", "summary", "answer", "content", "result"):
|
||||
value = payload.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip()
|
||||
if isinstance(payload, str) and payload.strip():
|
||||
return payload.strip()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if text.startswith('"') and text.endswith('"'):
|
||||
decoded = json.loads(text)
|
||||
if isinstance(decoded, str) and decoded.strip():
|
||||
return decoded.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def _extract_json_object_from_text(raw_text: str) -> Optional[Dict[str, Any]]:
|
||||
text = str(raw_text or "").strip()
|
||||
if not text:
|
||||
return None
|
||||
try:
|
||||
if text.startswith("{") and text.endswith("}"):
|
||||
parsed = json.loads(text)
|
||||
if isinstance(parsed, dict):
|
||||
return parsed
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
fenced_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text, flags=re.IGNORECASE)
|
||||
if fenced_match:
|
||||
candidate = str(fenced_match.group(1) or "").strip()
|
||||
try:
|
||||
parsed = json.loads(candidate)
|
||||
if isinstance(parsed, dict):
|
||||
return parsed
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
left = text.find("{")
|
||||
right = text.rfind("}")
|
||||
if left >= 0 and right > left:
|
||||
candidate = text[left:right + 1].strip()
|
||||
try:
|
||||
parsed = json.loads(candidate)
|
||||
if isinstance(parsed, dict):
|
||||
return parsed
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
def _classify_summary_line(self, section_title: str, line: str) -> str:
|
||||
title = str(section_title or "").lower()
|
||||
text = str(line or "").lower()
|
||||
if any(keyword.lower() in title or keyword.lower() in text for keyword in self.UNRESOLVED_HINTS):
|
||||
return "unresolved"
|
||||
if any(keyword.lower() in title or keyword.lower() in text for keyword in self.RESOURCE_HINTS):
|
||||
return "resource"
|
||||
if any(keyword.lower() in title or keyword.lower() in text for keyword in self.ROLE_HINTS):
|
||||
return "role"
|
||||
if self._looks_like_topic_title(section_title) or any(word in title for word in ["话题", "主题", "讨论", "进展"]):
|
||||
return "topic"
|
||||
return "key_point"
|
||||
|
||||
@staticmethod
|
||||
def _looks_like_generic_title(text: str) -> bool:
|
||||
value = str(text or "").strip().lower()
|
||||
return value in {"今日总结", "群聊总结", "summary", "每日总结", "日报"}
|
||||
|
||||
@staticmethod
|
||||
def _looks_like_topic_title(text: str) -> bool:
|
||||
value = str(text or "").strip()
|
||||
if not value:
|
||||
return False
|
||||
if len(value) > 22:
|
||||
return False
|
||||
return any(keyword in value for keyword in ["话题", "主题", "讨论", "进展", "问题", "项目", "模块"])
|
||||
|
||||
@staticmethod
|
||||
def _normalize_text_list(value: Any, limit: int = 4) -> List[str]:
|
||||
items: List[str] = []
|
||||
if isinstance(value, str):
|
||||
cleaned = GroupMemoryService._clean_text(value)
|
||||
return [cleaned] if cleaned else []
|
||||
if not isinstance(value, list):
|
||||
return items
|
||||
for item in value:
|
||||
if isinstance(item, dict):
|
||||
text = GroupMemoryService._clean_text(
|
||||
str(item.get("text") or item.get("title") or item.get("value") or item.get("name") or "")
|
||||
)
|
||||
else:
|
||||
text = GroupMemoryService._clean_text(str(item or ""))
|
||||
if not text:
|
||||
continue
|
||||
items.append(text)
|
||||
if len(items) >= limit:
|
||||
break
|
||||
return items
|
||||
|
||||
@staticmethod
|
||||
def _clean_text(text: str) -> str:
|
||||
cleaned = str(text or "").strip()
|
||||
if not cleaned:
|
||||
return ""
|
||||
cleaned = re.sub(r"!\[([^\]]*)\]\([^)]+\)", r"\1", cleaned)
|
||||
cleaned = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", cleaned)
|
||||
cleaned = re.sub(r"`([^`]+)`", r"\1", cleaned)
|
||||
cleaned = re.sub(r"(\*\*|__)(.*?)\1", r"\2", cleaned)
|
||||
cleaned = re.sub(r"(\*|_)(.*?)\1", r"\2", cleaned)
|
||||
cleaned = re.sub(r"^[>\-\*\+\d\.\)\(、\s]+", "", cleaned)
|
||||
cleaned = re.sub(r"\s+", " ", cleaned)
|
||||
return cleaned.strip(" ::;;,,。")
|
||||
|
||||
@staticmethod
|
||||
def _dedup_items(items: List[str], limit: int) -> List[str]:
|
||||
result: List[str] = []
|
||||
seen = set()
|
||||
for item in items:
|
||||
value = str(item or "").strip()
|
||||
if not value:
|
||||
continue
|
||||
normalized = re.sub(r"\s+", "", value.lower())
|
||||
if normalized in seen:
|
||||
continue
|
||||
seen.add(normalized)
|
||||
result.append(value)
|
||||
if len(result) >= limit:
|
||||
break
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _merge_unique(primary: List[str], secondary: List[str]) -> List[str]:
|
||||
merged: List[str] = []
|
||||
seen = set()
|
||||
for source in (primary or []) + (secondary or []):
|
||||
item = str(source or "").strip()
|
||||
if not item:
|
||||
continue
|
||||
normalized = re.sub(r"\s+", "", item.lower())
|
||||
if normalized in seen:
|
||||
continue
|
||||
seen.add(normalized)
|
||||
merged.append(item)
|
||||
return merged
|
||||
|
||||
@@ -26,10 +26,16 @@ class GroupProfileResolver:
|
||||
configured_domain = str(profile.get("knowledge_domain", "general") or "general")
|
||||
inferred_domain = str(group_memory_profile.get("inferred_domain", "general") or "general")
|
||||
inferred_style = group_memory_profile.get("style_profile", {}) or {}
|
||||
structured_summary = group_memory_profile.get("structured_summary", {}) or {}
|
||||
effective_domain = configured_domain
|
||||
if configured_domain in {"", "general", "casual"} and inferred_domain not in {"", "general"}:
|
||||
effective_domain = inferred_domain
|
||||
# 群摘要不再只是“一段文案”:
|
||||
# 1. 这里把群长期聚合后的稳定主题一起并到 focus 里;
|
||||
# 2. 这样群画像后续传给 prompt 时,LLM 能拿到更干净的字段,而不是自己再拆 markdown;
|
||||
# 3. 同时只保留去重后的短项,避免群摘要字段把配置 focus 全冲掉。
|
||||
inferred_focus = list(group_memory_profile.get("focus_topics", []))
|
||||
inferred_focus.extend(structured_summary.get("stable_topics", []) or [])
|
||||
merged_focus = []
|
||||
for item in focus + inferred_focus:
|
||||
if item and item not in merged_focus:
|
||||
@@ -63,5 +69,8 @@ class GroupProfileResolver:
|
||||
"group_memory_domain": inferred_domain,
|
||||
"group_memory_summary": group_memory_profile.get("summary_text", ""),
|
||||
"group_memory_sample_count": group_memory_profile.get("message_sample_count", 0),
|
||||
"group_memory_summary_days": group_memory_profile.get("summary_source_count", 0),
|
||||
"group_memory_structured": structured_summary,
|
||||
"group_memory_timeline": group_memory_profile.get("summary_timeline", []) or [],
|
||||
"group_memory_style": inferred_style,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user