优化自动回复对群摘要的结构化利用

2026-04-24 16:16:30 +08:00
parent 2b8a5d0ce6
commit 8a813df4a3
4 changed files with 480 additions and 22 deletions
--- a/plugins/ai_auto_response/config.toml
+++ b/plugins/ai_auto_response/config.toml
@@ -190,6 +190,14 @@ repeat_min_length = 4
 [logging]
 debug = true

+[group_profiles]
+# 群长期记忆不再只读“最新一天那篇总结”：
+# 1. 这里读取最近 5 份群摘要，再聚合成稳定主题/近期重点/未决问题；
+# 2. 自动回复消费时优先走这些结构字段，减少 markdown 大段文本的理解损耗；
+# 3. item_limit 控制每类字段带给模型的条数，避免群背景过重。
+summary_history_limit = 5
+summary_item_limit = 4
+
 [group_profiles.default]
 mode = "social"
 persona_id = "xiaoniu"
--- a/plugins/ai_auto_response/context/context_builder.py
+++ b/plugins/ai_auto_response/context/context_builder.py
@@ -359,11 +359,24 @@ class ContextBuilder:
        # 3. 更细的群事实、群关系仍走相关性增强链路。
        if not group_profile:
            return ""
+        structured = group_profile.get("group_memory_structured", {}) or {}
        summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""), max_chars=220, max_sentences=4)
        focus = ", ".join(group_profile.get("knowledge_focus", [])[:4])
        memory_style = ContextBuilder._build_style_summary(group_profile.get("group_memory_style", {}))
+        stable_topics = ContextBuilder._stringify_items(structured.get("stable_topics", []) or [], 4)
+        recent_points = ContextBuilder._stringify_items(structured.get("recent_key_points", []) or [], 3)
+        unresolved_points = ContextBuilder._stringify_items(structured.get("unresolved_points", []) or [], 3)
+        resource_clues = ContextBuilder._stringify_items(structured.get("resource_clues", []) or [], 3)
+        role_hints = ContextBuilder._stringify_items(structured.get("role_hints", []) or [], 3)
+        summary_days = int(group_profile.get("group_memory_summary_days", 0) or 0)
        lines = [
            "群长期背景：",
+            f"摘要观察窗口：最近 {summary_days} 份群总结" if summary_days > 0 else "",
+            f"稳定主题：{stable_topics}" if stable_topics else "",
+            f"近期重点：{recent_points}" if recent_points else "",
+            f"未决问题：{unresolved_points}" if unresolved_points else "",
+            f"共享资源/线索：{resource_clues}" if resource_clues else "",
+            f"角色线索：{role_hints}" if role_hints else "",
            f"长期摘要：{summary}" if summary else "",
            f"常聊方向：{focus}" if focus else "",
            f"历史社交风格：{memory_style}" if memory_style else "",
@@ -374,9 +387,15 @@ class ContextBuilder:
    def _build_group_profile_prompt(group_profile: Dict) -> str:
        if not group_profile:
            return "当前群没有特殊知识域限制。"
+        structured = group_profile.get("group_memory_structured", {}) or {}
        focus = ", ".join(group_profile.get("knowledge_focus", [])[:6])
        boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6])
        summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""))
+        stable_topics = ContextBuilder._stringify_items(structured.get("stable_topics", []) or [], 4)
+        recent_points = ContextBuilder._stringify_items(structured.get("recent_key_points", []) or [], 3)
+        unresolved_points = ContextBuilder._stringify_items(structured.get("unresolved_points", []) or [], 3)
+        resource_clues = ContextBuilder._stringify_items(structured.get("resource_clues", []) or [], 3)
+        role_hints = ContextBuilder._stringify_items(structured.get("role_hints", []) or [], 3)
        lines = [
            f"群模式：{group_profile.get('mode', 'social')}",
            f"知识域偏向：{group_profile.get('knowledge_domain', 'general')}（仅作理解倾向，不是每次都要显式提到）",
@@ -389,6 +408,15 @@ class ContextBuilder:
            f"表达松弛度：{group_profile.get('expressiveness_style', '克制')}",
            f"称呼强度：{group_profile.get('address_style', '低频称呼，默认直接接话')}",
            f"可能相关的话题背景：{focus}" if focus else "",
+            # 这里显式把群摘要结构字段展开给模型：
+            # 1. LLM 更擅长消费清晰字段，而不是再从 markdown 文案里二次猜测；
+            # 2. “稳定主题/近期重点/未决问题”分别承载不同决策用途，混成一段反而不好用；
+            # 3. 仍然保留原摘要关键句，作为字段缺失时的人类可读兜底。
+            f"群摘要稳定主题：{stable_topics}" if stable_topics else "",
+            f"群摘要近期重点：{recent_points}" if recent_points else "",
+            f"群摘要未决问题：{unresolved_points}" if unresolved_points else "",
+            f"群摘要资源线索：{resource_clues}" if resource_clues else "",
+            f"群摘要角色线索：{role_hints}" if role_hints else "",
            f"群长期摘要关键句：{summary}" if summary else "",
            f"历史推断社交风格：{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}"
            if group_profile.get("group_memory_style")
--- a/plugins/ai_auto_response/memory/group_memory_profile.py
+++ b/plugins/ai_auto_response/memory/group_memory_profile.py
@@ -1,7 +1,9 @@
 from __future__ import annotations

+import json
+import re
 from collections import Counter
-from typing import Dict, List
+from typing import Any, Dict, List, Optional

 from db.message_storage import MessageStorageDB
 from db.message_summary_db import MessageSummaryDBOperator
@@ -19,17 +21,30 @@ class GroupMemoryService:
    SHARPNESS_KEYWORDS = ["菜", "蠢", "逆天", "离谱", "抽象", "别搞", "别整", "你这", "搁这", "典"]
    RELAXED_KEYWORDS = ["随便", "行吧", "都行", "慢慢来", "不急", "摸鱼", "唠", "水群", "先这样"]
    SERIOUS_KEYWORDS = ["报错", "排查", "日志", "配置", "部署", "接口", "重现", "修复", "方案", "联调"]
+    UNRESOLVED_HINTS = ["未解决", "待解决", "待确认", "阻塞", "卡在", "异常", "报错", "问题", "风险", "todo", "TODO"]
+    RESOURCE_HINTS = ["文档", "链接", "地址", "仓库", "repo", "资料", "教程", "命令", "配置", "脚本", "接口"]
+    ROLE_HINTS = ["贡献", "活跃", "答疑", "负责", "推进", "owner", "结论", "方案位", "排查"]

    def __init__(self, db_manager, config: Dict):
        self.config = config or {}
        self.message_db = MessageStorageDB(db_manager)
        self.summary_db = MessageSummaryDBOperator(db_manager)
+        # 群聊自动回复不应该只盯着“昨天那一篇总结”：
+        # 1. 日摘要天然是日维度，如果只读最新一条，很容易把短期偶发波动误当成长期背景；
+        # 2. 这里改成读取最近几份摘要，再做轻量聚合，能让群长期画像更稳定；
+        # 3. 同时保留条数上限，避免群摘要本身反过来把 prompt 挤爆。
+        self.summary_history_limit = max(int(self.config.get("summary_history_limit", 5) or 5), 1)
+        self.summary_item_limit = max(int(self.config.get("summary_item_limit", 4) or 4), 1)

    def build_group_memory_profile(self, room_id: str, group_name: str = "") -> Dict:
        recent_messages = self.message_db.get_messages_for_summary(
            room_id, hours_ago=48, min_messages=20, max_hours=168, max_results=300
        ) or []
-        summary_text = self._load_recent_summary_text(room_id)
+        summary_records = self._load_recent_summary_records(room_id)
+        structured_summary = self._build_structured_summary_digest(summary_records)
+        summary_text = str(structured_summary.get("summary_text", "") or "").strip()
+        summary_corpus = self._build_summary_corpus(summary_records, structured_summary)
+
        topic_counter = Counter()
        domain_counter = Counter()
        humor_hits = 0
@@ -58,7 +73,9 @@ class GroupMemoryService:
            relaxed_hits += self._count_hits(content, self.RELAXED_KEYWORDS)
            serious_hits += self._count_hits(content, self.SERIOUS_KEYWORDS)

-        summary_lower = summary_text.lower()
+        # 群摘要这层不再只吃一整段 markdown 文案，而是优先吃已经抽好的结构字段。
+        # 这样领域判断和主题判断会更稳定，模型后续也更容易利用这些结论。
+        summary_lower = summary_corpus.lower()
        for domain, keywords in self.DOMAIN_KEYWORDS.items():
            hits = sum(1 for keyword in keywords if keyword and keyword.lower() in summary_lower)
            if hits:
@@ -72,7 +89,10 @@ class GroupMemoryService:
        serious_hits += self._count_hits(summary_lower, self.SERIOUS_KEYWORDS) * 2

        inferred_domain = domain_counter.most_common(1)[0][0] if domain_counter else "general"
-        focus_topics = [item for item, _ in topic_counter.most_common(6)]
+        focus_topics = self._merge_unique(
+            [item for item, _ in topic_counter.most_common(6)],
+            structured_summary.get("stable_topics", []) or [],
+        )[:6]
        style_profile = self._infer_style_profile(
            humor_hits=humor_hits,
            sharpness_hits=sharpness_hits,
@@ -88,6 +108,9 @@ class GroupMemoryService:
            "message_sample_count": len(recent_messages),
            "summary_text": summary_text,
            "style_profile": style_profile,
+            "structured_summary": structured_summary,
+            "summary_source_count": len(summary_records),
+            "summary_timeline": structured_summary.get("timeline", []) or [],
        }

    @staticmethod
@@ -134,22 +157,412 @@ class GroupMemoryService:
            "expressiveness_style": expressiveness_style,
        }

-    def _load_recent_summary_text(self, room_id: str) -> str:
-        candidates: List[Dict] = []
-        for summary_type in ("daily", "manual"):
-            sql = """
-            SELECT *
-            FROM t_message_summary
-            WHERE chatroom_id = %s AND summary_type = %s
-            ORDER BY period_end DESC, update_time DESC
-            LIMIT 1
-            """
-            rows = self.summary_db.execute_query(sql, (room_id, summary_type)) or []
-            candidates.extend(rows)
-        if not candidates:
-            return ""
-        candidates.sort(
-            key=lambda item: (str(item.get("period_end", "")), str(item.get("update_time", ""))),
-            reverse=True,
+    def _load_recent_summary_records(self, room_id: str) -> List[Dict]:
+        sql = """
+        SELECT *
+        FROM t_message_summary
+        WHERE chatroom_id = %s AND summary_type IN ('daily', 'manual')
+        ORDER BY period_end DESC, update_time DESC
+        LIMIT %s
+        """
+        rows = self.summary_db.execute_query(sql, (room_id, self.summary_history_limit)) or []
+        records: List[Dict] = []
+        seen_keys = set()
+        for row in rows:
+            normalized = self.summary_db._deserialize_row(dict(row)) or {}
+            summary_type = str(normalized.get("summary_type", "") or "")
+            period_key = str(normalized.get("period_key", "") or "")
+            dedup_key = f"{summary_type}:{period_key}"
+            if not period_key or dedup_key in seen_keys:
+                continue
+            seen_keys.add(dedup_key)
+            structured = self._extract_structured_summary(normalized.get("summary_text", ""))
+            normalized["structured_summary"] = structured
+            records.append(normalized)
+        return records
+
+    def _build_structured_summary_digest(self, records: List[Dict]) -> Dict:
+        if not records:
+            return {
+                "stable_topics": [],
+                "recent_key_points": [],
+                "unresolved_points": [],
+                "resource_clues": [],
+                "role_hints": [],
+                "timeline": [],
+                "summary_text": "",
+            }
+
+        stable_topic_scores: Counter[str] = Counter()
+        key_point_scores: Counter[str] = Counter()
+        unresolved_scores: Counter[str] = Counter()
+        resource_scores: Counter[str] = Counter()
+        role_scores: Counter[str] = Counter()
+        timeline: List[str] = []
+
+        for index, record in enumerate(records):
+            weight = max(self.summary_history_limit - index, 1)
+            period_key = str(record.get("period_key", "") or "")
+            structured = record.get("structured_summary", {}) or {}
+
+            for item in structured.get("topics", []) or []:
+                stable_topic_scores[item] += weight * 2
+            for item in structured.get("key_points", []) or []:
+                key_point_scores[item] += weight
+            for item in structured.get("unresolved_points", []) or []:
+                unresolved_scores[item] += weight * 2
+            for item in structured.get("resource_clues", []) or []:
+                resource_scores[item] += weight
+            for item in structured.get("role_hints", []) or []:
+                role_scores[item] += weight
+
+            lead = str(structured.get("lead", "") or "").strip()
+            if not lead:
+                key_points = structured.get("key_points", []) or []
+                lead = str(key_points[0] if key_points else "").strip()
+            if period_key and lead:
+                timeline.append(f"{period_key}: {lead}")
+
+        stable_topics = [item for item, _ in stable_topic_scores.most_common(self.summary_item_limit)]
+        recent_key_points = [item for item, _ in key_point_scores.most_common(self.summary_item_limit)]
+        unresolved_points = [item for item, _ in unresolved_scores.most_common(self.summary_item_limit)]
+        resource_clues = [item for item, _ in resource_scores.most_common(self.summary_item_limit)]
+        role_hints = [item for item, _ in role_scores.most_common(self.summary_item_limit)]
+        summary_text = self._compose_structured_summary_text(
+            stable_topics=stable_topics,
+            recent_key_points=recent_key_points,
+            unresolved_points=unresolved_points,
+            resource_clues=resource_clues,
+            role_hints=role_hints,
        )
-        return str(candidates[0].get("summary_text", "") or "").strip()
+        return {
+            "stable_topics": stable_topics,
+            "recent_key_points": recent_key_points,
+            "unresolved_points": unresolved_points,
+            "resource_clues": resource_clues,
+            "role_hints": role_hints,
+            "timeline": timeline[: self.summary_item_limit],
+            "summary_text": summary_text,
+        }
+
+    @staticmethod
+    def _compose_structured_summary_text(
+        *,
+        stable_topics: List[str],
+        recent_key_points: List[str],
+        unresolved_points: List[str],
+        resource_clues: List[str],
+        role_hints: List[str],
+    ) -> str:
+        parts = []
+        if stable_topics:
+            parts.append(f"稳定主题：{'、'.join(stable_topics[:4])}")
+        if recent_key_points:
+            parts.append(f"近期重点：{'、'.join(recent_key_points[:3])}")
+        if unresolved_points:
+            parts.append(f"未决问题：{'、'.join(unresolved_points[:3])}")
+        if resource_clues:
+            parts.append(f"常见资源：{'、'.join(resource_clues[:3])}")
+        if role_hints:
+            parts.append(f"群内角色线索：{'、'.join(role_hints[:3])}")
+        return "；".join(parts)
+
+    def _build_summary_corpus(self, records: List[Dict], structured_summary: Dict) -> str:
+        texts: List[str] = []
+        for record in records:
+            structured = record.get("structured_summary", {}) or {}
+            texts.extend(structured.get("topics", []) or [])
+            texts.extend(structured.get("key_points", []) or [])
+            texts.extend(structured.get("unresolved_points", []) or [])
+            texts.extend(structured.get("resource_clues", []) or [])
+            texts.extend(structured.get("role_hints", []) or [])
+        texts.extend(structured_summary.get("stable_topics", []) or [])
+        texts.extend(structured_summary.get("recent_key_points", []) or [])
+        texts.extend(structured_summary.get("unresolved_points", []) or [])
+        texts.extend(structured_summary.get("resource_clues", []) or [])
+        texts.extend(structured_summary.get("role_hints", []) or [])
+        return " ".join([str(item).strip() for item in texts if str(item).strip()])
+
+    def _extract_structured_summary(self, raw_summary: str) -> Dict:
+        payload = self._extract_json_object_from_text(raw_summary)
+        if payload:
+            return self._normalize_json_summary_payload(payload)
+        return self._parse_markdown_summary(raw_summary)
+
+    def _normalize_json_summary_payload(self, payload: Dict[str, Any]) -> Dict:
+        lead = self._clean_text(
+            str(payload.get("lead") or payload.get("summary_lead") or payload.get("overview") or "")
+        )
+        topics: List[str] = []
+        key_points: List[str] = []
+        unresolved_points = self._normalize_text_list(
+            payload.get("unresolved_pool") or payload.get("unresolved_points"),
+            limit=self.summary_item_limit,
+        )
+        # 资源类字段来自不同 workflow 版本时，可能一边是字符串、一边是数组。
+        # 这里分开标准化后再合并，避免直接相加时出现 str/list 类型冲突。
+        resource_clues = self._normalize_text_list(
+            payload.get("shared_resources"),
+            limit=self.summary_item_limit,
+        )
+        resource_clues.extend(
+            self._normalize_text_list(
+                payload.get("marketplace"),
+                limit=self.summary_item_limit,
+            )
+        )
+        role_hints = self._normalize_text_list(payload.get("top_contributors"), limit=self.summary_item_limit)
+
+        raw_topics = payload.get("topics")
+        if isinstance(raw_topics, list):
+            for item in raw_topics:
+                if isinstance(item, dict):
+                    title = self._clean_text(str(item.get("title") or item.get("name") or ""))
+                    if title:
+                        topics.append(title)
+                    key_points.extend(
+                        self._normalize_text_list(
+                            item.get("overview_points") or item.get("key_points") or item.get("highlights"),
+                            limit=2,
+                        )
+                    )
+                    key_points.extend(
+                        self._normalize_text_list(item.get("analysis_points") or item.get("analysis"), limit=1)
+                    )
+                else:
+                    value = self._clean_text(str(item or ""))
+                    if value:
+                        topics.append(value)
+
+        key_points.extend(
+            self._normalize_text_list(
+                payload.get("core_knowledge_points") or payload.get("core_points"),
+                limit=self.summary_item_limit,
+            )
+        )
+        if not lead:
+            lead = self._clean_text(
+                str(payload.get("fallback_text") or payload.get("raw_summary") or "")
+            )
+        if not lead and key_points:
+            lead = key_points[0]
+
+        return {
+            "lead": lead,
+            "topics": self._dedup_items(topics, self.summary_item_limit),
+            "key_points": self._dedup_items(key_points, self.summary_item_limit),
+            "unresolved_points": self._dedup_items(unresolved_points, self.summary_item_limit),
+            "resource_clues": self._dedup_items(resource_clues, self.summary_item_limit),
+            "role_hints": self._dedup_items(role_hints, self.summary_item_limit),
+        }
+
+    def _parse_markdown_summary(self, raw_summary: str) -> Dict:
+        text = self._extract_llm_payload_text(raw_summary)
+        lines = [str(line or "").rstrip() for line in text.splitlines()]
+        current_section = ""
+        lead = ""
+        topics: List[str] = []
+        key_points: List[str] = []
+        unresolved_points: List[str] = []
+        resource_clues: List[str] = []
+        role_hints: List[str] = []
+
+        for raw_line in lines:
+            line = self._clean_text(raw_line)
+            if not line:
+                continue
+
+            heading_match = re.match(r"^#{1,6}\s*(.+)$", raw_line.strip())
+            if heading_match:
+                current_section = self._clean_text(heading_match.group(1))
+                if not lead and not self._looks_like_generic_title(current_section):
+                    lead = current_section
+                if current_section and self._looks_like_topic_title(current_section):
+                    topics.append(current_section)
+                continue
+
+            bullet_match = re.match(r"^([-*+]|\d+[.)、])\s*(.+)$", raw_line.strip())
+            if bullet_match:
+                line = self._clean_text(bullet_match.group(2))
+
+            bucket = self._classify_summary_line(current_section, line)
+            if bucket == "unresolved":
+                unresolved_points.append(line)
+            elif bucket == "resource":
+                resource_clues.append(line)
+            elif bucket == "role":
+                role_hints.append(line)
+            elif bucket == "topic":
+                topics.append(line)
+            else:
+                key_points.append(line)
+                if not lead and len(line) >= 6:
+                    lead = line
+
+        if not lead and key_points:
+            lead = key_points[0]
+
+        return {
+            "lead": lead,
+            "topics": self._dedup_items(topics, self.summary_item_limit),
+            "key_points": self._dedup_items(key_points, self.summary_item_limit),
+            "unresolved_points": self._dedup_items(unresolved_points, self.summary_item_limit),
+            "resource_clues": self._dedup_items(resource_clues, self.summary_item_limit),
+            "role_hints": self._dedup_items(role_hints, self.summary_item_limit),
+        }
+
+    @staticmethod
+    def _extract_llm_payload_text(summary_text: str) -> str:
+        text = str(summary_text or "").strip()
+        if not text:
+            return ""
+        try:
+            if (text.startswith("{") and text.endswith("}")) or (text.startswith("[") and text.endswith("]")):
+                payload = json.loads(text)
+                if isinstance(payload, dict):
+                    for key in ("text", "summary", "answer", "content", "result"):
+                        value = payload.get(key)
+                        if isinstance(value, str) and value.strip():
+                            return value.strip()
+                if isinstance(payload, str) and payload.strip():
+                    return payload.strip()
+        except Exception:
+            pass
+        try:
+            if text.startswith('"') and text.endswith('"'):
+                decoded = json.loads(text)
+                if isinstance(decoded, str) and decoded.strip():
+                    return decoded.strip()
+        except Exception:
+            pass
+        return text
+
+    @staticmethod
+    def _extract_json_object_from_text(raw_text: str) -> Optional[Dict[str, Any]]:
+        text = str(raw_text or "").strip()
+        if not text:
+            return None
+        try:
+            if text.startswith("{") and text.endswith("}"):
+                parsed = json.loads(text)
+                if isinstance(parsed, dict):
+                    return parsed
+        except Exception:
+            pass
+
+        fenced_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text, flags=re.IGNORECASE)
+        if fenced_match:
+            candidate = str(fenced_match.group(1) or "").strip()
+            try:
+                parsed = json.loads(candidate)
+                if isinstance(parsed, dict):
+                    return parsed
+            except Exception:
+                pass
+
+        left = text.find("{")
+        right = text.rfind("}")
+        if left >= 0 and right > left:
+            candidate = text[left:right + 1].strip()
+            try:
+                parsed = json.loads(candidate)
+                if isinstance(parsed, dict):
+                    return parsed
+            except Exception:
+                return None
+        return None
+
+    def _classify_summary_line(self, section_title: str, line: str) -> str:
+        title = str(section_title or "").lower()
+        text = str(line or "").lower()
+        if any(keyword.lower() in title or keyword.lower() in text for keyword in self.UNRESOLVED_HINTS):
+            return "unresolved"
+        if any(keyword.lower() in title or keyword.lower() in text for keyword in self.RESOURCE_HINTS):
+            return "resource"
+        if any(keyword.lower() in title or keyword.lower() in text for keyword in self.ROLE_HINTS):
+            return "role"
+        if self._looks_like_topic_title(section_title) or any(word in title for word in ["话题", "主题", "讨论", "进展"]):
+            return "topic"
+        return "key_point"
+
+    @staticmethod
+    def _looks_like_generic_title(text: str) -> bool:
+        value = str(text or "").strip().lower()
+        return value in {"今日总结", "群聊总结", "summary", "每日总结", "日报"}
+
+    @staticmethod
+    def _looks_like_topic_title(text: str) -> bool:
+        value = str(text or "").strip()
+        if not value:
+            return False
+        if len(value) > 22:
+            return False
+        return any(keyword in value for keyword in ["话题", "主题", "讨论", "进展", "问题", "项目", "模块"])
+
+    @staticmethod
+    def _normalize_text_list(value: Any, limit: int = 4) -> List[str]:
+        items: List[str] = []
+        if isinstance(value, str):
+            cleaned = GroupMemoryService._clean_text(value)
+            return [cleaned] if cleaned else []
+        if not isinstance(value, list):
+            return items
+        for item in value:
+            if isinstance(item, dict):
+                text = GroupMemoryService._clean_text(
+                    str(item.get("text") or item.get("title") or item.get("value") or item.get("name") or "")
+                )
+            else:
+                text = GroupMemoryService._clean_text(str(item or ""))
+            if not text:
+                continue
+            items.append(text)
+            if len(items) >= limit:
+                break
+        return items
+
+    @staticmethod
+    def _clean_text(text: str) -> str:
+        cleaned = str(text or "").strip()
+        if not cleaned:
+            return ""
+        cleaned = re.sub(r"!\[([^\]]*)\]\([^)]+\)", r"\1", cleaned)
+        cleaned = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", cleaned)
+        cleaned = re.sub(r"`([^`]+)`", r"\1", cleaned)
+        cleaned = re.sub(r"(\*\*|__)(.*?)\1", r"\2", cleaned)
+        cleaned = re.sub(r"(\*|_)(.*?)\1", r"\2", cleaned)
+        cleaned = re.sub(r"^[>\-\*\+\d\.\)\(、\s]+", "", cleaned)
+        cleaned = re.sub(r"\s+", " ", cleaned)
+        return cleaned.strip(" ：:；;，,。")
+
+    @staticmethod
+    def _dedup_items(items: List[str], limit: int) -> List[str]:
+        result: List[str] = []
+        seen = set()
+        for item in items:
+            value = str(item or "").strip()
+            if not value:
+                continue
+            normalized = re.sub(r"\s+", "", value.lower())
+            if normalized in seen:
+                continue
+            seen.add(normalized)
+            result.append(value)
+            if len(result) >= limit:
+                break
+        return result
+
+    @staticmethod
+    def _merge_unique(primary: List[str], secondary: List[str]) -> List[str]:
+        merged: List[str] = []
+        seen = set()
+        for source in (primary or []) + (secondary or []):
+            item = str(source or "").strip()
+            if not item:
+                continue
+            normalized = re.sub(r"\s+", "", item.lower())
+            if normalized in seen:
+                continue
+            seen.add(normalized)
+            merged.append(item)
+        return merged
--- a/plugins/ai_auto_response/profile/group_profile.py
+++ b/plugins/ai_auto_response/profile/group_profile.py
@@ -26,10 +26,16 @@ class GroupProfileResolver:
        configured_domain = str(profile.get("knowledge_domain", "general") or "general")
        inferred_domain = str(group_memory_profile.get("inferred_domain", "general") or "general")
        inferred_style = group_memory_profile.get("style_profile", {}) or {}
+        structured_summary = group_memory_profile.get("structured_summary", {}) or {}
        effective_domain = configured_domain
        if configured_domain in {"", "general", "casual"} and inferred_domain not in {"", "general"}:
            effective_domain = inferred_domain
+        # 群摘要不再只是“一段文案”：
+        # 1. 这里把群长期聚合后的稳定主题一起并到 focus 里；
+        # 2. 这样群画像后续传给 prompt 时，LLM 能拿到更干净的字段，而不是自己再拆 markdown；
+        # 3. 同时只保留去重后的短项，避免群摘要字段把配置 focus 全冲掉。
        inferred_focus = list(group_memory_profile.get("focus_topics", []))
+        inferred_focus.extend(structured_summary.get("stable_topics", []) or [])
        merged_focus = []
        for item in focus + inferred_focus:
            if item and item not in merged_focus:
@@ -63,5 +69,8 @@ class GroupProfileResolver:
            "group_memory_domain": inferred_domain,
            "group_memory_summary": group_memory_profile.get("summary_text", ""),
            "group_memory_sample_count": group_memory_profile.get("message_sample_count", 0),
+            "group_memory_summary_days": group_memory_profile.get("summary_source_count", 0),
+            "group_memory_structured": structured_summary,
+            "group_memory_timeline": group_memory_profile.get("summary_timeline", []) or [],
            "group_memory_style": inferred_style,
        }