from __future__ import annotations import re from typing import Dict, List class ContextBuilder: def __init__(self, recent_context_size: int = 30): self.recent_context_size = max(int(recent_context_size or 30), 1) def build( self, *, room_id: str, group_profile: Dict, sender: str, sender_name: str, content: str, recent_messages: List[Dict], member_context: Dict, member_memory_focus: List[str] | None = None, trigger: Dict, flow_state: str, reply_mode: str, vector_memories: List[Dict], social_memory: Dict | None = None, group_facts: Dict | None = None, quote_context: Dict | None = None, image_context: Dict | None = None, ) -> Dict: selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {}) recent_lines = [] for item in selected_messages: msg_sender = item.get("sender_name") or item.get("sender") or "未知成员" msg_content = item.get("content") or item.get("message") or "" if msg_content: recent_lines.append(f"{msg_sender}: {msg_content}") return { "group_profile": group_profile or {"room_id": room_id}, "speaker_profile": { "wxid": sender, "display_name": sender_name, "member_context": member_context or {}, }, "speaker_name_clean": self._clean_display_name(sender_name), "is_at": bool(trigger.get("is_at", False)), "is_directed": bool(trigger.get("is_directed", False)), "recent_message_items": self._build_recent_message_items(selected_messages), "recent_messages": recent_lines, "recent_summary": "", "trigger_type": trigger.get("trigger_type", "none"), "reply_mode": reply_mode, "flow_state": flow_state, "memory_prompt": self._build_member_memory_prompt(member_context, member_memory_focus or []), "at_member_profile_prompt": self._build_at_member_profile_prompt( member_context=member_context or {}, focus_lines=member_memory_focus or [], is_at=bool(trigger.get("is_at", False)), is_directed=bool(trigger.get("is_directed", False)), ), "vector_memory_prompt": self._build_vector_memory_prompt(vector_memories), "social_memory_prompt": self._build_social_memory_prompt(social_memory or {}), "group_facts_prompt": self._build_group_facts_prompt(group_facts or {}), "group_profile_prompt": self._build_group_profile_prompt(group_profile or {}), "quote_prompt": self._build_quote_prompt(quote_context or {}), "image_prompt": self._build_image_prompt(image_context or {}), "image_safety_prompt": self._build_image_safety_prompt( (quote_context or {}).get("image_safety") or {} ), "current_message": f"{sender_name}: {content}", } @staticmethod def _build_recent_message_items(messages: List[Dict]) -> List[Dict]: items: List[Dict] = [] for idx, item in enumerate(messages, start=1): content = str(item.get("content") or item.get("message") or "").strip() if not content: continue items.append({ "idx": idx, "sender": item.get("sender_name") or item.get("sender") or "未知成员", "content": content[:120], "is_at": bool(item.get("is_at")), }) return items def _select_recent_messages( self, recent_messages: List[Dict], current_sender: str, current_content: str, quote_context: Dict, ) -> List[Dict]: if not recent_messages: return [] window = recent_messages[-self.recent_context_size:] if len(window) <= 8: return window current_tokens = self._extract_topic_tokens(current_content) quote_tokens = self._extract_topic_tokens( f"{quote_context.get('title', '')} {quote_context.get('quote_body', '')}" ) focus_tokens = current_tokens | quote_tokens quote_sender_name = str(quote_context.get("quote_sender_name", "") or "").strip().lower() scored: List[tuple[int, int, Dict]] = [] for idx, item in enumerate(window): score = self._message_relevance( item, current_sender=current_sender, focus_tokens=focus_tokens, quote_sender_name=quote_sender_name, ) if score > 0: scored.append((score, idx, item)) # 总是保留尾部几条,维持现场感;再拼上与当前话题最相关的消息。 tail_indexes = set(range(max(len(window) - 4, 0), len(window))) keep_indexes = set(tail_indexes) for _, idx, _ in sorted(scored, key=lambda x: (-x[0], -x[1]))[:10]: keep_indexes.add(idx) selected = [window[idx] for idx in sorted(keep_indexes)] if len(selected) < 6: return window[-6:] return selected[-12:] @classmethod def _message_relevance( cls, item: Dict, *, current_sender: str, focus_tokens: set[str], quote_sender_name: str, ) -> int: content = str(item.get("content") or item.get("message") or "").strip() if not content: return 0 sender = str(item.get("sender", "") or "") sender_name = str(item.get("sender_name", "") or "").strip().lower() score = 0 if sender == current_sender: score += 3 if quote_sender_name and quote_sender_name in sender_name: score += 3 if item.get("is_at"): score += 1 if focus_tokens: tokens = cls._extract_topic_tokens(content) overlap = focus_tokens & tokens score += min(len(overlap) * 2, 6) if overlap and cls._looks_like_question_or_answer(content): score += 2 elif sender == current_sender: score += 1 if cls._looks_like_question_or_answer(content): score += 1 return score @staticmethod def _looks_like_question_or_answer(content: str) -> bool: text = str(content or "").strip().lower() if not text: return False patterns = [ r"\?$", r"?$", r"怎么", r"如何", r"为啥", r"为什么", r"能不能", r"可以吗", r"报错", r"试试", r"先", r"然后", r"配置", r"日志", r"接口", r"原因", ] return any(re.search(pattern, text, flags=re.IGNORECASE) for pattern in patterns) @staticmethod def _extract_topic_tokens(content: str) -> set[str]: text = str(content or "").lower() tokens = set(re.findall(r"[a-z0-9_\\-]{3,}", text)) keywords = [ "openclaw", "qdrant", "ollama", "docker", "python", "api", "插件", "机器人", "模型", "日志", "配置", "报错", "部署", "联网", "图片", "记忆", "群聊", "dota", "战绩", ] for keyword in keywords: if keyword in text: tokens.add(keyword) return tokens @staticmethod def _clean_display_name(sender_name: str) -> str: import re text = str(sender_name or "").strip() if not text: return "" text = re.sub(r"\s+", "", text) text = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9_]", "", text) return text[:8] @staticmethod def _build_member_memory_prompt(member_context: Dict, focus_lines: List[str] | None = None) -> str: if not member_context: return "暂无稳定成员画像。" meta = member_context.get("meta", {}) or {} topics = member_context.get("topics_of_interest", []) or [] recent_focus = member_context.get("recent_focus", []) or [] common_scenarios = ContextBuilder._stringify_items(meta.get("common_scenarios", []), 4) skills = ContextBuilder._stringify_items(meta.get("skill_profile", []), 5) problem_solving = ContextBuilder._stringify_items(meta.get("problem_solving_profile", []), 4) stable_traits = ContextBuilder._stringify_items(meta.get("stable_traits", []), 4) habits = ContextBuilder._stringify_items(meta.get("habit_patterns", []), 4) expression_profile = ContextBuilder._stringify_items(meta.get("expression_profile", []), 4) reply_entry = ContextBuilder._stringify_items(meta.get("reply_entry_profile", []), 4) reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []), 4) recent_state = ContextBuilder._stringify_items(meta.get("recent_state", []), 4) reply_taboos = ContextBuilder._stringify_items(meta.get("reply_taboos", []), 3) lines = [ f"成员摘要:{member_context.get('summary_text', '')}".strip(), f"互动风格:{member_context.get('interaction_style', '')}".strip(), f"回复偏好:{member_context.get('response_style_hint', '')}".strip(), f"本次相关记忆:{';'.join((focus_lines or [])[:4])}" if focus_lines else "", f"长期主题:{', '.join(topics[:5])}" if topics else "", f"近期关注:{', '.join(recent_focus[:4])}" if recent_focus else "", f"常见发言场景:{common_scenarios}" if common_scenarios else "", f"技能侧重点:{skills}" if skills else "", f"处理问题方式:{problem_solving}" if problem_solving else "", f"稳定特征:{stable_traits}" if stable_traits else "", f"习惯模式:{habits}" if habits else "", f"表达标记:{expression_profile}" if expression_profile else "", f"有效接话点:{reply_entry}" if reply_entry else "", f"长期回复偏好:{reply_prefs}" if reply_prefs else "", f"近期状态:{recent_state}" if recent_state else "", f"气质倾向:{meta.get('temperament_tendency', '')}".strip(), f"群内角色:{meta.get('group_role', '')}".strip(), f"回复禁忌:{reply_taboos}" if reply_taboos else "", ] return "\n".join([line for line in lines if line]) @staticmethod def _build_at_member_profile_prompt( member_context: Dict, focus_lines: List[str] | None = None, is_at: bool = False, is_directed: bool = False, ) -> str: # 只有明确 @ 或强定向时才给“高优先级成员画像”,避免平时过度套人设 if not (is_at or is_directed): return "" if not member_context: return "本次是对方点名发起,但暂无稳定画像,按自然群友口吻短回复。" meta = member_context.get("meta", {}) or {} summary = str(member_context.get("summary_text", "") or "").strip() interaction_style = str(member_context.get("interaction_style", "") or "").strip() response_hint = str(member_context.get("response_style_hint", "") or "").strip() topics = ContextBuilder._stringify_items(member_context.get("topics_of_interest", []) or [], 4) focus = ";".join((focus_lines or [])[:3]).strip() lines = [ "本次为点名互动,优先参考该成员画像后再回复:", f"成员摘要:{summary}" if summary else "", f"互动风格:{interaction_style}" if interaction_style else "", f"偏好回复方式:{response_hint}" if response_hint else "", f"近期相关记忆:{focus}" if focus else "", f"长期兴趣:{topics}" if topics else "", f"禁忌提醒:{ContextBuilder._stringify_items(meta.get('reply_taboos', []), 3)}" if meta.get("reply_taboos") else "", "语气要像熟悉的群友,短句、自然,不要客服腔。", ] return "\n".join([line for line in lines if line]) @staticmethod def _stringify_items(items: List | str, limit: int) -> str: if isinstance(items, str): return items.strip() values: List[str] = [] for item in items[:limit]: if isinstance(item, dict): value = str( item.get("name") or item.get("label") or item.get("value") or item.get("text") or "" ).strip() else: value = str(item or "").strip() if value and value not in values: values.append(value) return ", ".join(values) @staticmethod def _build_vector_memory_prompt(vector_memories: List[Dict]) -> str: if not vector_memories: return "" lines = [] for item in vector_memories[:2]: summary = item.get("content_summary") or item.get("summary_text") or item.get("text") or "" memory_type = item.get("memory_type", "memory") if summary: lines.append(f"[{memory_type}] {summary}") return "\n".join(lines) @staticmethod def _build_social_memory_prompt(social_memory: Dict) -> str: prompt = str((social_memory or {}).get("prompt", "") or "").strip() return prompt @staticmethod def _build_group_facts_prompt(group_facts: Dict) -> str: return str((group_facts or {}).get("prompt", "") or "").strip() @staticmethod def _build_group_profile_prompt(group_profile: Dict) -> str: if not group_profile: return "当前群没有特殊知识域限制。" focus = ", ".join(group_profile.get("knowledge_focus", [])[:6]) boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6]) summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or "")) lines = [ f"群模式:{group_profile.get('mode', 'social')}", f"知识域偏向:{group_profile.get('knowledge_domain', 'general')}(仅作理解倾向,不是每次都要显式提到)", f"配置知识域:{group_profile.get('configured_domain', 'general')}(仅在当前话题相关时参考)", f"历史推断知识域:{group_profile.get('group_memory_domain', 'general')}(弱参考)", f"回答风格:{group_profile.get('reply_style', '自然短句')}", f"互动调性:{group_profile.get('interaction_tone', '自然群友感')}", f"幽默强度:{group_profile.get('humor_style', '轻微')}", f"嘴硬程度:{group_profile.get('sharpness_style', '轻微嘴硬,不刻薄')}", f"表达松弛度:{group_profile.get('expressiveness_style', '克制')}", f"称呼强度:{group_profile.get('address_style', '低频称呼,默认直接接话')}", f"可能相关的话题背景:{focus}" if focus else "", f"群长期摘要关键句:{summary}" if summary else "", f"历史推断社交风格:{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}" if group_profile.get("group_memory_style") else "", f"边界提醒:{boundaries}" if boundaries else "", f"人格叠加:{group_profile.get('persona_overlay', '')}".strip(), ] return "\n".join([line for line in lines if line]) @staticmethod def _build_style_summary(style_profile: Dict) -> str: if not style_profile: return "" return " / ".join( [ str(style_profile.get("interaction_tone", "") or "").strip(), str(style_profile.get("humor_style", "") or "").strip(), str(style_profile.get("sharpness_style", "") or "").strip(), str(style_profile.get("expressiveness_style", "") or "").strip(), ] ).strip(" /") @staticmethod def _compact_group_summary(summary_text: str, max_chars: int = 420, max_sentences: int = 6) -> str: text = str(summary_text or "").strip() if not text: return "" text = re.sub(r"\s+", " ", text.replace("\n", " ").replace("\r", " ")).strip() if len(text) <= max_chars: return text # 长摘要按句提炼,优先保留“结论/风险/动作/配置”等关键词句,避免简单截断丢重点。 sentences = [part.strip(" ,,;;。.!?!?::") for part in re.split(r"[。!?!?;;\n]+", text) if part.strip()] if not sentences: return text[: max_chars - 3] + "..." key_patterns = [ r"结论|核心|重点|关键|建议|方案|步骤|原因|影响|风险|注意|问题|异常|报错|故障|超时|阻塞", r"配置|参数|阈值|策略|限制|回退|优化|修复|排查|上线|回滚|依赖|版本|兼容", ] scored: List[tuple[int, int, str]] = [] for idx, sentence in enumerate(sentences): score = 0 for pattern in key_patterns: if re.search(pattern, sentence, flags=re.IGNORECASE): score += 3 if re.search(r"\d", sentence): score += 1 if 8 <= len(sentence) <= 80: score += 1 if idx == 0 or idx == len(sentences) - 1: score += 1 scored.append((score, idx, sentence)) chosen_indexes = {0, len(sentences) - 1} for _, idx, _ in sorted(scored, key=lambda x: (-x[0], x[1])): chosen_indexes.add(idx) if len(chosen_indexes) >= max_sentences: break chosen = [sentences[idx] for idx in sorted(chosen_indexes)] merged = ";".join([item for item in chosen if item]).strip(";") if len(merged) <= max_chars: return merged return merged[: max_chars - 3].rstrip(" ,,;;。.!?!?::") + "..." @staticmethod def _build_quote_prompt(quote_context: Dict) -> str: if not quote_context: return "" quote_type = quote_context.get("quote_type_label", "引用消息") quote_sender = (quote_context.get("quote_sender_name", "") or "").strip() quote_body = quote_context.get("quote_body", "") or "" title = quote_context.get("title", "") or "" lines = [ f"用户这次是在引用消息后发言。", f"引用类型:{quote_type}", f"被引用发送者:{quote_sender}" if quote_sender and quote_sender != "未知成员" else "", f"图片附件:已附带原图" if quote_context.get("has_image_attachment") else "", f"引用标题:{title}" if title else "", f"被引用内容:{quote_body}" if quote_body else "", ] payload = [line for line in lines if line] # 兜底:如果最终只剩“引用类型”,没有可用内容,就不输出引用补充 if len(payload) <= 2 and not quote_body and not title: return "" return "\n".join(payload) @staticmethod def _build_image_prompt(image_context: Dict) -> str: if not image_context: return "" lines = [ "已附带最近一张群图片作为上下文。", f"图片发送者:{image_context.get('sender_name', '未知成员')}", f"图片说明:{image_context.get('hint', '')}" if image_context.get("hint") else "", ] return "\n".join([line for line in lines if line]) @staticmethod def _build_image_safety_prompt(image_safety: Dict) -> str: if not image_safety or not image_safety.get("suspected"): return "" if image_safety.get("has_visual_context"): return "当前发言疑似是在评论图片,但本次已附带图片上下文,可以基于图片谨慎理解。" reason = str(image_safety.get("reason", "") or "").strip() lines = [ "当前发言疑似是在评论图片,但你这次没有看到图片本身。", f"原因:{reason}" if reason else "", "不要假装看过图,不要直接评价画面细节、人物状态、构图、文字内容或颜色元素。", "如果要回,只能轻微承认信息不足,或请对方引用图片/补一句文字说明,再继续。", ] return "\n".join([line for line in lines if line])