Files
abot/plugins/ai_auto_response/context/context_builder.py
2026-04-10 09:45:39 +08:00

359 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import re
from typing import Dict, List
class ContextBuilder:
def __init__(self, recent_context_size: int = 30):
self.recent_context_size = max(int(recent_context_size or 30), 1)
def build(
self,
*,
room_id: str,
group_profile: Dict,
sender: str,
sender_name: str,
content: str,
recent_messages: List[Dict],
member_context: Dict,
member_memory_focus: List[str] | None = None,
trigger: Dict,
flow_state: str,
reply_mode: str,
vector_memories: List[Dict],
social_memory: Dict | None = None,
group_facts: Dict | None = None,
quote_context: Dict | None = None,
image_context: Dict | None = None,
) -> Dict:
selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {})
recent_lines = []
for item in selected_messages:
msg_sender = item.get("sender_name") or item.get("sender") or "未知成员"
msg_content = item.get("content") or item.get("message") or ""
if msg_content:
recent_lines.append(f"{msg_sender}: {msg_content}")
return {
"group_profile": group_profile or {"room_id": room_id},
"speaker_profile": {
"wxid": sender,
"display_name": sender_name,
"member_context": member_context or {},
},
"speaker_name_clean": self._clean_display_name(sender_name),
"recent_message_items": self._build_recent_message_items(selected_messages),
"recent_messages": recent_lines,
"recent_summary": "",
"trigger_type": trigger.get("trigger_type", "none"),
"reply_mode": reply_mode,
"flow_state": flow_state,
"memory_prompt": self._build_member_memory_prompt(member_context, member_memory_focus or []),
"vector_memory_prompt": self._build_vector_memory_prompt(vector_memories),
"social_memory_prompt": self._build_social_memory_prompt(social_memory or {}),
"group_facts_prompt": self._build_group_facts_prompt(group_facts or {}),
"group_profile_prompt": self._build_group_profile_prompt(group_profile or {}),
"quote_prompt": self._build_quote_prompt(quote_context or {}),
"image_prompt": self._build_image_prompt(image_context or {}),
"image_safety_prompt": self._build_image_safety_prompt(
(quote_context or {}).get("image_safety") or {}
),
"current_message": f"{sender_name}: {content}",
}
@staticmethod
def _build_recent_message_items(messages: List[Dict]) -> List[Dict]:
items: List[Dict] = []
for idx, item in enumerate(messages, start=1):
content = str(item.get("content") or item.get("message") or "").strip()
if not content:
continue
items.append({
"idx": idx,
"sender": item.get("sender_name") or item.get("sender") or "未知成员",
"content": content[:120],
"is_at": bool(item.get("is_at")),
})
return items
def _select_recent_messages(
self,
recent_messages: List[Dict],
current_sender: str,
current_content: str,
quote_context: Dict,
) -> List[Dict]:
if not recent_messages:
return []
window = recent_messages[-self.recent_context_size:]
if len(window) <= 8:
return window
current_tokens = self._extract_topic_tokens(current_content)
quote_tokens = self._extract_topic_tokens(
f"{quote_context.get('title', '')} {quote_context.get('quote_body', '')}"
)
focus_tokens = current_tokens | quote_tokens
quote_sender_name = str(quote_context.get("quote_sender_name", "") or "").strip().lower()
scored: List[tuple[int, int, Dict]] = []
for idx, item in enumerate(window):
score = self._message_relevance(
item,
current_sender=current_sender,
focus_tokens=focus_tokens,
quote_sender_name=quote_sender_name,
)
if score > 0:
scored.append((score, idx, item))
# 总是保留尾部几条,维持现场感;再拼上与当前话题最相关的消息。
tail_indexes = set(range(max(len(window) - 4, 0), len(window)))
keep_indexes = set(tail_indexes)
for _, idx, _ in sorted(scored, key=lambda x: (-x[0], -x[1]))[:10]:
keep_indexes.add(idx)
selected = [window[idx] for idx in sorted(keep_indexes)]
if len(selected) < 6:
return window[-6:]
return selected[-12:]
@classmethod
def _message_relevance(
cls,
item: Dict,
*,
current_sender: str,
focus_tokens: set[str],
quote_sender_name: str,
) -> int:
content = str(item.get("content") or item.get("message") or "").strip()
if not content:
return 0
sender = str(item.get("sender", "") or "")
sender_name = str(item.get("sender_name", "") or "").strip().lower()
score = 0
if sender == current_sender:
score += 3
if quote_sender_name and quote_sender_name in sender_name:
score += 3
if item.get("is_at"):
score += 1
if focus_tokens:
tokens = cls._extract_topic_tokens(content)
overlap = focus_tokens & tokens
score += min(len(overlap) * 2, 6)
if overlap and cls._looks_like_question_or_answer(content):
score += 2
elif sender == current_sender:
score += 1
if cls._looks_like_question_or_answer(content):
score += 1
return score
@staticmethod
def _looks_like_question_or_answer(content: str) -> bool:
text = str(content or "").strip().lower()
if not text:
return False
patterns = [
r"\?$", r"$", r"怎么", r"如何", r"为啥", r"为什么", r"能不能", r"可以吗",
r"报错", r"试试", r"", r"然后", r"配置", r"日志", r"接口", r"原因",
]
return any(re.search(pattern, text, flags=re.IGNORECASE) for pattern in patterns)
@staticmethod
def _extract_topic_tokens(content: str) -> set[str]:
text = str(content or "").lower()
tokens = set(re.findall(r"[a-z0-9_\\-]{3,}", text))
keywords = [
"openclaw", "qdrant", "ollama", "docker", "python", "api", "插件", "机器人", "模型",
"日志", "配置", "报错", "部署", "联网", "图片", "记忆", "群聊", "dota", "战绩",
]
for keyword in keywords:
if keyword in text:
tokens.add(keyword)
return tokens
@staticmethod
def _clean_display_name(sender_name: str) -> str:
import re
text = str(sender_name or "").strip()
if not text:
return ""
text = re.sub(r"\s+", "", text)
text = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9_]", "", text)
return text[:8]
@staticmethod
def _build_member_memory_prompt(member_context: Dict, focus_lines: List[str] | None = None) -> str:
if not member_context:
return "暂无稳定成员画像。"
meta = member_context.get("meta", {}) or {}
topics = member_context.get("topics_of_interest", []) or []
recent_focus = member_context.get("recent_focus", []) or []
common_scenarios = ContextBuilder._stringify_items(meta.get("common_scenarios", []), 4)
skills = ContextBuilder._stringify_items(meta.get("skill_profile", []), 5)
problem_solving = ContextBuilder._stringify_items(meta.get("problem_solving_profile", []), 4)
stable_traits = ContextBuilder._stringify_items(meta.get("stable_traits", []), 4)
habits = ContextBuilder._stringify_items(meta.get("habit_patterns", []), 4)
expression_profile = ContextBuilder._stringify_items(meta.get("expression_profile", []), 4)
reply_entry = ContextBuilder._stringify_items(meta.get("reply_entry_profile", []), 4)
reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []), 4)
recent_state = ContextBuilder._stringify_items(meta.get("recent_state", []), 4)
reply_taboos = ContextBuilder._stringify_items(meta.get("reply_taboos", []), 3)
lines = [
f"成员摘要:{member_context.get('summary_text', '')}".strip(),
f"互动风格:{member_context.get('interaction_style', '')}".strip(),
f"回复偏好:{member_context.get('response_style_hint', '')}".strip(),
f"本次相关记忆:{''.join((focus_lines or [])[:4])}" if focus_lines else "",
f"长期主题:{', '.join(topics[:5])}" if topics else "",
f"近期关注:{', '.join(recent_focus[:4])}" if recent_focus else "",
f"常见发言场景:{common_scenarios}" if common_scenarios else "",
f"技能侧重点:{skills}" if skills else "",
f"处理问题方式:{problem_solving}" if problem_solving else "",
f"稳定特征:{stable_traits}" if stable_traits else "",
f"习惯模式:{habits}" if habits else "",
f"表达标记:{expression_profile}" if expression_profile else "",
f"有效接话点:{reply_entry}" if reply_entry else "",
f"长期回复偏好:{reply_prefs}" if reply_prefs else "",
f"近期状态:{recent_state}" if recent_state else "",
f"气质倾向:{meta.get('temperament_tendency', '')}".strip(),
f"群内角色:{meta.get('group_role', '')}".strip(),
f"回复禁忌:{reply_taboos}" if reply_taboos else "",
]
return "\n".join([line for line in lines if line])
@staticmethod
def _stringify_items(items: List | str, limit: int) -> str:
if isinstance(items, str):
return items.strip()
values: List[str] = []
for item in items[:limit]:
if isinstance(item, dict):
value = str(
item.get("name")
or item.get("label")
or item.get("value")
or item.get("text")
or ""
).strip()
else:
value = str(item or "").strip()
if value and value not in values:
values.append(value)
return ", ".join(values)
@staticmethod
def _build_vector_memory_prompt(vector_memories: List[Dict]) -> str:
if not vector_memories:
return ""
lines = []
for item in vector_memories[:2]:
summary = item.get("content_summary") or item.get("summary_text") or item.get("text") or ""
memory_type = item.get("memory_type", "memory")
if summary:
lines.append(f"[{memory_type}] {summary}")
return "\n".join(lines)
@staticmethod
def _build_social_memory_prompt(social_memory: Dict) -> str:
prompt = str((social_memory or {}).get("prompt", "") or "").strip()
return prompt
@staticmethod
def _build_group_facts_prompt(group_facts: Dict) -> str:
return str((group_facts or {}).get("prompt", "") or "").strip()
@staticmethod
def _build_group_profile_prompt(group_profile: Dict) -> str:
if not group_profile:
return "当前群没有特殊知识域限制。"
focus = ", ".join(group_profile.get("knowledge_focus", [])[:6])
boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6])
summary = str(group_profile.get("group_memory_summary", "") or "").replace("\n", " ").strip()
if len(summary) > 120:
summary = summary[:117] + "..."
lines = [
f"群模式:{group_profile.get('mode', 'social')}",
f"知识域偏向:{group_profile.get('knowledge_domain', 'general')}(仅作理解倾向,不是每次都要显式提到)",
f"配置知识域:{group_profile.get('configured_domain', 'general')}(仅在当前话题相关时参考)",
f"历史推断知识域:{group_profile.get('group_memory_domain', 'general')}(弱参考)",
f"回答风格:{group_profile.get('reply_style', '自然短句')}",
f"互动调性:{group_profile.get('interaction_tone', '自然群友感')}",
f"幽默强度:{group_profile.get('humor_style', '轻微')}",
f"嘴硬程度:{group_profile.get('sharpness_style', '轻微嘴硬,不刻薄')}",
f"表达松弛度:{group_profile.get('expressiveness_style', '克制')}",
f"称呼强度:{group_profile.get('address_style', '低频称呼,默认直接接话')}",
f"可能相关的话题背景:{focus}" if focus else "",
f"群长期摘要:{summary}" if summary else "",
f"历史推断社交风格:{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}"
if group_profile.get("group_memory_style")
else "",
f"边界提醒:{boundaries}" if boundaries else "",
f"人格叠加:{group_profile.get('persona_overlay', '')}".strip(),
]
return "\n".join([line for line in lines if line])
@staticmethod
def _build_style_summary(style_profile: Dict) -> str:
if not style_profile:
return ""
return " / ".join(
[
str(style_profile.get("interaction_tone", "") or "").strip(),
str(style_profile.get("humor_style", "") or "").strip(),
str(style_profile.get("sharpness_style", "") or "").strip(),
str(style_profile.get("expressiveness_style", "") or "").strip(),
]
).strip(" /")
@staticmethod
def _build_quote_prompt(quote_context: Dict) -> str:
if not quote_context:
return ""
quote_type = quote_context.get("quote_type_label", "引用消息")
quote_sender = quote_context.get("quote_sender_name", "") or "未知成员"
quote_body = quote_context.get("quote_body", "") or ""
title = quote_context.get("title", "") or ""
lines = [
f"用户这次是在引用消息后发言。",
f"引用类型:{quote_type}",
f"被引用发送者:{quote_sender}",
f"图片附件:已附带原图" if quote_context.get("has_image_attachment") else "",
f"引用标题:{title}" if title else "",
f"被引用内容:{quote_body}" if quote_body else "",
]
return "\n".join([line for line in lines if line])
@staticmethod
def _build_image_prompt(image_context: Dict) -> str:
if not image_context:
return ""
lines = [
"已附带最近一张群图片作为上下文。",
f"图片发送者:{image_context.get('sender_name', '未知成员')}",
f"图片说明:{image_context.get('hint', '')}" if image_context.get("hint") else "",
]
return "\n".join([line for line in lines if line])
@staticmethod
def _build_image_safety_prompt(image_safety: Dict) -> str:
if not image_safety or not image_safety.get("suspected"):
return ""
if image_safety.get("has_visual_context"):
return "当前发言疑似是在评论图片,但本次已附带图片上下文,可以基于图片谨慎理解。"
reason = str(image_safety.get("reason", "") or "").strip()
lines = [
"当前发言疑似是在评论图片,但你这次没有看到图片本身。",
f"原因:{reason}" if reason else "",
"不要假装看过图,不要直接评价画面细节、人物状态、构图、文字内容或颜色元素。",
"如果要回,只能轻微承认信息不足,或请对方引用图片/补一句文字说明,再继续。",
]
return "\n".join([line for line in lines if line])