Files
abot/plugins/ai_auto_response/context/context_builder.py

533 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import re
from typing import Dict, List
class ContextBuilder:
def __init__(self, recent_context_size: int = 30):
self.recent_context_size = max(int(recent_context_size or 30), 1)
def build(
self,
*,
room_id: str,
group_profile: Dict,
sender: str,
sender_name: str,
content: str,
recent_messages: List[Dict],
member_context: Dict,
member_memory_focus: List[str] | None = None,
trigger: Dict,
flow_state: str,
reply_mode: str,
vector_memories: List[Dict],
social_memory: Dict | None = None,
group_facts: Dict | None = None,
quote_context: Dict | None = None,
image_context: Dict | None = None,
) -> Dict:
selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {})
recent_lines = []
for idx, item in enumerate(selected_messages, start=1):
msg_sender = item.get("sender_name") or item.get("sender") or "未知成员"
msg_content = item.get("content") or item.get("message") or ""
if msg_content:
# 这里把“发言人”和“正文”拆开保存,避免后续模型把昵称词汇误当成讨论主题。
recent_lines.append(
self._format_recent_message_line(
idx=idx,
sender_name=str(msg_sender),
content=str(msg_content),
is_at=bool(item.get("is_at")),
)
)
return {
"group_profile": group_profile or {"room_id": room_id},
"speaker_profile": {
"wxid": sender,
"display_name": sender_name,
"member_context": member_context or {},
},
"speaker_name_clean": self._clean_display_name(sender_name),
"is_at": bool(trigger.get("is_at", False)),
"is_directed": bool(trigger.get("is_directed", False)),
"recent_message_items": self._build_recent_message_items(selected_messages),
"recent_messages": recent_lines,
"recent_summary": "",
"trigger_type": trigger.get("trigger_type", "none"),
"reply_mode": reply_mode,
"flow_state": flow_state,
"member_profile_brief_prompt": self._build_member_profile_brief_prompt(member_context or {}),
"memory_prompt": self._build_member_memory_prompt(member_context, member_memory_focus or []),
"at_member_profile_prompt": self._build_at_member_profile_prompt(
member_context=member_context or {},
focus_lines=member_memory_focus or [],
is_at=bool(trigger.get("is_at", False)),
is_directed=bool(trigger.get("is_directed", False)),
),
"vector_memory_prompt": self._build_vector_memory_prompt(vector_memories),
"social_memory_prompt": self._build_social_memory_prompt(social_memory or {}),
"group_facts_prompt": self._build_group_facts_prompt(group_facts or {}),
"group_long_memory_prompt": self._build_group_long_memory_prompt(group_profile or {}),
"group_profile_prompt": self._build_group_profile_prompt(group_profile or {}),
"quote_prompt": self._build_quote_prompt(quote_context or {}),
"image_prompt": self._build_image_prompt(image_context or {}),
"image_safety_prompt": self._build_image_safety_prompt(
(quote_context or {}).get("image_safety") or {}
),
"current_message": self._format_current_message_block(sender_name, content),
}
@staticmethod
def _build_recent_message_items(messages: List[Dict]) -> List[Dict]:
items: List[Dict] = []
for idx, item in enumerate(messages, start=1):
content = str(item.get("content") or item.get("message") or "").strip()
if not content:
continue
items.append({
"idx": idx,
"sender": item.get("sender_name") or item.get("sender") or "未知成员",
"content": content[:120],
"is_at": bool(item.get("is_at")),
})
return items
@staticmethod
def _sanitize_inline_field(value: str, max_chars: int = 120) -> str:
# 统一把换行和分隔符清掉,避免后续在单行结构化文本里把字段边界冲散。
text = re.sub(r"\s+", " ", str(value or "")).strip()
text = text.replace("|", "")
if len(text) > max_chars:
return text[: max_chars - 3].rstrip() + "..."
return text
@classmethod
def _format_recent_message_line(cls, idx: int, sender_name: str, content: str, is_at: bool = False) -> str:
sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员"
body = cls._sanitize_inline_field(content, max_chars=120)
parts = [f"[{idx:02d}]", f"发言人={sender}", f"正文={body}"]
if is_at:
parts.append("@bot=Y")
return " | ".join(parts)
@classmethod
def _format_current_message_block(cls, sender_name: str, content: str) -> str:
# 当前消息改成“元信息 + 正文”两段式,方便模型只把正文视为话题语义来源。
sender = cls._sanitize_inline_field(sender_name, max_chars=24) or "未知成员"
body = cls._sanitize_inline_field(content, max_chars=500)
return f"发言人={sender}\n正文={body}"
def _select_recent_messages(
self,
recent_messages: List[Dict],
current_sender: str,
current_content: str,
quote_context: Dict,
) -> List[Dict]:
if not recent_messages:
return []
# 这里直接把“最近 N 条”原样交给后续提示词层,而不是再做一次相关性裁剪:
# 1. 用户明确要求给模型 30 条最近消息,方便推断群里正在讨论的上下文;
# 2. 之前的“相关性筛选 + 尾部保留”虽然更省 token但会打断对话连续性
# 3. 对群聊场景来说,连续现场通常比少量高分片段更有利于模型判断谁在接谁的话。
#
# 这里仍保留签名参数不动,是为了兼容上层调用,避免后续改动牵连太多。
window = recent_messages[-self.recent_context_size:]
return window
@classmethod
def _message_relevance(
cls,
item: Dict,
*,
current_sender: str,
focus_tokens: set[str],
quote_sender_name: str,
) -> int:
content = str(item.get("content") or item.get("message") or "").strip()
if not content:
return 0
sender = str(item.get("sender", "") or "")
sender_name = str(item.get("sender_name", "") or "").strip().lower()
score = 0
if sender == current_sender:
score += 3
if quote_sender_name and quote_sender_name in sender_name:
score += 3
if item.get("is_at"):
score += 1
if focus_tokens:
tokens = cls._extract_topic_tokens(content)
overlap = focus_tokens & tokens
score += min(len(overlap) * 2, 6)
if overlap and cls._looks_like_question_or_answer(content):
score += 2
elif sender == current_sender:
score += 1
if cls._looks_like_question_or_answer(content):
score += 1
return score
@staticmethod
def _looks_like_question_or_answer(content: str) -> bool:
text = str(content or "").strip().lower()
if not text:
return False
patterns = [
r"\?$", r"$", r"怎么", r"如何", r"为啥", r"为什么", r"能不能", r"可以吗",
r"报错", r"试试", r"", r"然后", r"配置", r"日志", r"接口", r"原因",
]
return any(re.search(pattern, text, flags=re.IGNORECASE) for pattern in patterns)
@staticmethod
def _extract_topic_tokens(content: str) -> set[str]:
text = str(content or "").lower()
tokens = set(re.findall(r"[a-z0-9_\\-]{3,}", text))
keywords = [
"openclaw", "qdrant", "ollama", "docker", "python", "api", "插件", "机器人", "模型",
"日志", "配置", "报错", "部署", "联网", "图片", "记忆", "群聊", "dota", "战绩",
]
for keyword in keywords:
if keyword in text:
tokens.add(keyword)
return tokens
@staticmethod
def _clean_display_name(sender_name: str) -> str:
import re
text = str(sender_name or "").strip()
if not text:
return ""
text = re.sub(r"\s+", "", text)
text = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9_]", "", text)
return text[:8]
@staticmethod
def _build_member_profile_brief_prompt(member_context: Dict) -> str:
# 这份摘要是“常驻给模型看的轻画像”:
# 1. 不要求当前一定是 @ 或强定向,因为用户希望每次回答都能带上对这个人的基本认识;
# 2. 这里只保留少量稳定信息,避免画像太重把当前问题压住;
# 3. 更细的成员记忆、近期相关记忆,仍走后面的按需增强链路。
if not member_context:
return ""
meta = member_context.get("meta", {}) or {}
summary = str(member_context.get("summary_text", "") or "").strip()
interaction_style = str(member_context.get("interaction_style", "") or "").strip()
response_hint = str(member_context.get("response_style_hint", "") or "").strip()
topics = ContextBuilder._stringify_items(member_context.get("topics_of_interest", []) or [], 3)
recent_focus = ContextBuilder._stringify_items(member_context.get("recent_focus", []) or [], 2)
skills = ContextBuilder._stringify_items(meta.get("skill_profile", []) or [], 2)
reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []) or [], 2)
lines = [
"当前发言人轻画像:",
f"成员摘要:{summary}" if summary else "",
f"互动风格:{interaction_style}" if interaction_style else "",
f"偏好回复方式:{response_hint}" if response_hint else "",
f"长期兴趣:{topics}" if topics else "",
f"近期关注:{recent_focus}" if recent_focus else "",
f"技能侧重点:{skills}" if skills else "",
f"回复偏好:{reply_prefs}" if reply_prefs else "",
"这些信息只用于帮助理解提问方式和回答切口,不要像在背档案。",
]
return "\n".join([line for line in lines if line])
@staticmethod
def _build_member_memory_prompt(member_context: Dict, focus_lines: List[str] | None = None) -> str:
if not member_context:
return "暂无稳定成员画像。"
meta = member_context.get("meta", {}) or {}
topics = member_context.get("topics_of_interest", []) or []
recent_focus = member_context.get("recent_focus", []) or []
common_scenarios = ContextBuilder._stringify_items(meta.get("common_scenarios", []), 4)
skills = ContextBuilder._stringify_items(meta.get("skill_profile", []), 5)
problem_solving = ContextBuilder._stringify_items(meta.get("problem_solving_profile", []), 4)
stable_traits = ContextBuilder._stringify_items(meta.get("stable_traits", []), 4)
habits = ContextBuilder._stringify_items(meta.get("habit_patterns", []), 4)
expression_profile = ContextBuilder._stringify_items(meta.get("expression_profile", []), 4)
reply_entry = ContextBuilder._stringify_items(meta.get("reply_entry_profile", []), 4)
reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []), 4)
recent_state = ContextBuilder._stringify_items(meta.get("recent_state", []), 4)
reply_taboos = ContextBuilder._stringify_items(meta.get("reply_taboos", []), 3)
lines = [
f"成员摘要:{member_context.get('summary_text', '')}".strip(),
f"互动风格:{member_context.get('interaction_style', '')}".strip(),
f"回复偏好:{member_context.get('response_style_hint', '')}".strip(),
f"本次相关记忆:{''.join((focus_lines or [])[:4])}" if focus_lines else "",
f"长期主题:{', '.join(topics[:5])}" if topics else "",
f"近期关注:{', '.join(recent_focus[:4])}" if recent_focus else "",
f"常见发言场景:{common_scenarios}" if common_scenarios else "",
f"技能侧重点:{skills}" if skills else "",
f"处理问题方式:{problem_solving}" if problem_solving else "",
f"稳定特征:{stable_traits}" if stable_traits else "",
f"习惯模式:{habits}" if habits else "",
f"表达标记:{expression_profile}" if expression_profile else "",
f"有效接话点:{reply_entry}" if reply_entry else "",
f"长期回复偏好:{reply_prefs}" if reply_prefs else "",
f"近期状态:{recent_state}" if recent_state else "",
f"气质倾向:{meta.get('temperament_tendency', '')}".strip(),
f"群内角色:{meta.get('group_role', '')}".strip(),
f"回复禁忌:{reply_taboos}" if reply_taboos else "",
]
return "\n".join([line for line in lines if line])
@staticmethod
def _build_at_member_profile_prompt(
member_context: Dict,
focus_lines: List[str] | None = None,
is_at: bool = False,
is_directed: bool = False,
) -> str:
# 只有明确 @ 或强定向时才给“高优先级成员画像”,避免平时过度套人设
if not (is_at or is_directed):
return ""
if not member_context:
return "本次是对方点名发起,但暂无稳定画像,按自然群友口吻短回复。"
meta = member_context.get("meta", {}) or {}
summary = str(member_context.get("summary_text", "") or "").strip()
interaction_style = str(member_context.get("interaction_style", "") or "").strip()
response_hint = str(member_context.get("response_style_hint", "") or "").strip()
topics = ContextBuilder._stringify_items(member_context.get("topics_of_interest", []) or [], 4)
focus = "".join((focus_lines or [])[:3]).strip()
lines = [
"本次为点名互动,优先参考该成员画像后再回复:",
f"成员摘要:{summary}" if summary else "",
f"互动风格:{interaction_style}" if interaction_style else "",
f"偏好回复方式:{response_hint}" if response_hint else "",
f"近期相关记忆:{focus}" if focus else "",
f"长期兴趣:{topics}" if topics else "",
f"禁忌提醒:{ContextBuilder._stringify_items(meta.get('reply_taboos', []), 3)}"
if meta.get("reply_taboos")
else "",
"语气要像熟悉的群友,短句、自然,不要客服腔。",
]
return "\n".join([line for line in lines if line])
@staticmethod
def _stringify_items(items: List | str, limit: int) -> str:
if isinstance(items, str):
return items.strip()
values: List[str] = []
for item in items[:limit]:
if isinstance(item, dict):
value = str(
item.get("name")
or item.get("label")
or item.get("value")
or item.get("text")
or ""
).strip()
else:
value = str(item or "").strip()
if value and value not in values:
values.append(value)
return ", ".join(values)
@staticmethod
def _build_vector_memory_prompt(vector_memories: List[Dict]) -> str:
if not vector_memories:
return ""
lines = []
for item in vector_memories[:2]:
summary = item.get("content_summary") or item.get("summary_text") or item.get("text") or ""
memory_type = item.get("memory_type", "memory")
if summary:
lines.append(f"[{memory_type}] {summary}")
return "\n".join(lines)
@staticmethod
def _build_social_memory_prompt(social_memory: Dict) -> str:
prompt = str((social_memory or {}).get("prompt", "") or "").strip()
return prompt
@staticmethod
def _build_group_facts_prompt(group_facts: Dict) -> str:
return str((group_facts or {}).get("prompt", "") or "").strip()
@staticmethod
def _build_group_long_memory_prompt(group_profile: Dict) -> str:
# 这份摘要是“群长期背景常驻层”:
# 1. 每次都给一小段,帮助模型知道这个群长期在聊什么、什么风格更合适;
# 2. 不把完整群画像整段塞进去,避免大量通用风格描述把 token 吃满;
# 3. 更细的群事实、群关系仍走相关性增强链路。
if not group_profile:
return ""
structured = group_profile.get("group_memory_structured", {}) or {}
summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""), max_chars=220, max_sentences=4)
focus = ", ".join(group_profile.get("knowledge_focus", [])[:4])
memory_style = ContextBuilder._build_style_summary(group_profile.get("group_memory_style", {}))
stable_topics = ContextBuilder._stringify_items(structured.get("stable_topics", []) or [], 4)
recent_points = ContextBuilder._stringify_items(structured.get("recent_key_points", []) or [], 3)
unresolved_points = ContextBuilder._stringify_items(structured.get("unresolved_points", []) or [], 3)
resource_clues = ContextBuilder._stringify_items(structured.get("resource_clues", []) or [], 3)
role_hints = ContextBuilder._stringify_items(structured.get("role_hints", []) or [], 3)
summary_days = int(group_profile.get("group_memory_summary_days", 0) or 0)
lines = [
"群长期背景:",
f"摘要观察窗口:最近 {summary_days} 份群总结" if summary_days > 0 else "",
f"稳定主题:{stable_topics}" if stable_topics else "",
f"近期重点:{recent_points}" if recent_points else "",
f"未决问题:{unresolved_points}" if unresolved_points else "",
f"共享资源/线索:{resource_clues}" if resource_clues else "",
f"角色线索:{role_hints}" if role_hints else "",
f"长期摘要:{summary}" if summary else "",
f"常聊方向:{focus}" if focus else "",
f"历史社交风格:{memory_style}" if memory_style else "",
]
return "\n".join([line for line in lines if line])
@staticmethod
def _build_group_profile_prompt(group_profile: Dict) -> str:
if not group_profile:
return "当前群没有特殊知识域限制。"
structured = group_profile.get("group_memory_structured", {}) or {}
focus = ", ".join(group_profile.get("knowledge_focus", [])[:6])
boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6])
summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""))
stable_topics = ContextBuilder._stringify_items(structured.get("stable_topics", []) or [], 4)
recent_points = ContextBuilder._stringify_items(structured.get("recent_key_points", []) or [], 3)
unresolved_points = ContextBuilder._stringify_items(structured.get("unresolved_points", []) or [], 3)
resource_clues = ContextBuilder._stringify_items(structured.get("resource_clues", []) or [], 3)
role_hints = ContextBuilder._stringify_items(structured.get("role_hints", []) or [], 3)
lines = [
f"群模式:{group_profile.get('mode', 'social')}",
f"知识域偏向:{group_profile.get('knowledge_domain', 'general')}(仅作理解倾向,不是每次都要显式提到)",
f"配置知识域:{group_profile.get('configured_domain', 'general')}(仅在当前话题相关时参考)",
f"历史推断知识域:{group_profile.get('group_memory_domain', 'general')}(弱参考)",
f"回答风格:{group_profile.get('reply_style', '自然短句')}",
f"互动调性:{group_profile.get('interaction_tone', '自然群友感')}",
f"幽默强度:{group_profile.get('humor_style', '轻微')}",
f"嘴硬程度:{group_profile.get('sharpness_style', '轻微嘴硬,不刻薄')}",
f"表达松弛度:{group_profile.get('expressiveness_style', '克制')}",
f"称呼强度:{group_profile.get('address_style', '低频称呼,默认直接接话')}",
f"可能相关的话题背景:{focus}" if focus else "",
# 这里显式把群摘要结构字段展开给模型:
# 1. LLM 更擅长消费清晰字段,而不是再从 markdown 文案里二次猜测;
# 2. “稳定主题/近期重点/未决问题”分别承载不同决策用途,混成一段反而不好用;
# 3. 仍然保留原摘要关键句,作为字段缺失时的人类可读兜底。
f"群摘要稳定主题:{stable_topics}" if stable_topics else "",
f"群摘要近期重点:{recent_points}" if recent_points else "",
f"群摘要未决问题:{unresolved_points}" if unresolved_points else "",
f"群摘要资源线索:{resource_clues}" if resource_clues else "",
f"群摘要角色线索:{role_hints}" if role_hints else "",
f"群长期摘要关键句:{summary}" if summary else "",
f"历史推断社交风格:{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}"
if group_profile.get("group_memory_style")
else "",
f"边界提醒:{boundaries}" if boundaries else "",
f"人格叠加:{group_profile.get('persona_overlay', '')}".strip(),
]
return "\n".join([line for line in lines if line])
@staticmethod
def _build_style_summary(style_profile: Dict) -> str:
if not style_profile:
return ""
return " / ".join(
[
str(style_profile.get("interaction_tone", "") or "").strip(),
str(style_profile.get("humor_style", "") or "").strip(),
str(style_profile.get("sharpness_style", "") or "").strip(),
str(style_profile.get("expressiveness_style", "") or "").strip(),
]
).strip(" /")
@staticmethod
def _compact_group_summary(summary_text: str, max_chars: int = 420, max_sentences: int = 6) -> str:
text = str(summary_text or "").strip()
if not text:
return ""
text = re.sub(r"\s+", " ", text.replace("\n", " ").replace("\r", " ")).strip()
if len(text) <= max_chars:
return text
# 长摘要按句提炼,优先保留“结论/风险/动作/配置”等关键词句,避免简单截断丢重点。
sentences = [part.strip(" ,;。.!?:") for part in re.split(r"[。!?!?;\n]+", text) if part.strip()]
if not sentences:
return text[: max_chars - 3] + "..."
key_patterns = [
r"结论|核心|重点|关键|建议|方案|步骤|原因|影响|风险|注意|问题|异常|报错|故障|超时|阻塞",
r"配置|参数|阈值|策略|限制|回退|优化|修复|排查|上线|回滚|依赖|版本|兼容",
]
scored: List[tuple[int, int, str]] = []
for idx, sentence in enumerate(sentences):
score = 0
for pattern in key_patterns:
if re.search(pattern, sentence, flags=re.IGNORECASE):
score += 3
if re.search(r"\d", sentence):
score += 1
if 8 <= len(sentence) <= 80:
score += 1
if idx == 0 or idx == len(sentences) - 1:
score += 1
scored.append((score, idx, sentence))
chosen_indexes = {0, len(sentences) - 1}
for _, idx, _ in sorted(scored, key=lambda x: (-x[0], x[1])):
chosen_indexes.add(idx)
if len(chosen_indexes) >= max_sentences:
break
chosen = [sentences[idx] for idx in sorted(chosen_indexes)]
merged = "".join([item for item in chosen if item]).strip("")
if len(merged) <= max_chars:
return merged
return merged[: max_chars - 3].rstrip(" ,;。.!?:") + "..."
@staticmethod
def _build_quote_prompt(quote_context: Dict) -> str:
if not quote_context:
return ""
quote_type = quote_context.get("quote_type_label", "引用消息")
quote_sender = (quote_context.get("quote_sender_name", "") or "").strip()
quote_body = quote_context.get("quote_body", "") or ""
title = quote_context.get("title", "") or ""
lines = [
f"用户这次是在引用消息后发言。",
f"引用类型:{quote_type}",
f"被引用发送者:{quote_sender}" if quote_sender and quote_sender != "未知成员" else "",
f"图片附件:已附带原图" if quote_context.get("has_image_attachment") else "",
f"引用标题:{title}" if title else "",
f"被引用内容:{quote_body}" if quote_body else "",
]
payload = [line for line in lines if line]
# 兜底:如果最终只剩“引用类型”,没有可用内容,就不输出引用补充
if len(payload) <= 2 and not quote_body and not title:
return ""
return "\n".join(payload)
@staticmethod
def _build_image_prompt(image_context: Dict) -> str:
if not image_context:
return ""
lines = [
"已附带最近一张群图片作为上下文。",
f"图片发送者:{image_context.get('sender_name', '未知成员')}",
f"图片说明:{image_context.get('hint', '')}" if image_context.get("hint") else "",
]
return "\n".join([line for line in lines if line])
@staticmethod
def _build_image_safety_prompt(image_safety: Dict) -> str:
if not image_safety or not image_safety.get("suspected"):
return ""
if image_safety.get("has_visual_context"):
return "当前发言疑似是在评论图片,但本次已附带图片上下文,可以基于图片谨慎理解。"
reason = str(image_safety.get("reason", "") or "").strip()
lines = [
"当前发言疑似是在评论图片,但你这次没有看到图片本身。",
f"原因:{reason}" if reason else "",
"不要假装看过图,不要直接评价画面细节、人物状态、构图、文字内容或颜色元素。",
"如果要回,只能轻微承认信息不足,或请对方引用图片/补一句文字说明,再继续。",
]
return "\n".join([line for line in lines if line])