Files
abot/plugins/ai_auto_response/context_builder.py

318 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import re
from typing import Dict, List
class ContextBuilder:
def __init__(self, recent_context_size: int = 30):
self.recent_context_size = max(int(recent_context_size or 30), 1)
def build(
self,
*,
room_id: str,
group_profile: Dict,
sender: str,
sender_name: str,
content: str,
recent_messages: List[Dict],
member_context: Dict,
trigger: Dict,
flow_state: str,
reply_mode: str,
vector_memories: List[Dict],
quote_context: Dict | None = None,
image_context: Dict | None = None,
) -> Dict:
selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {})
recent_lines = []
for item in selected_messages:
msg_sender = item.get("sender_name") or item.get("sender") or "未知成员"
msg_content = item.get("content") or item.get("message") or ""
if msg_content:
recent_lines.append(f"{msg_sender}: {msg_content}")
return {
"group_profile": group_profile or {"room_id": room_id},
"speaker_profile": {
"wxid": sender,
"display_name": sender_name,
"member_context": member_context or {},
},
"speaker_name_clean": self._clean_display_name(sender_name),
"recent_message_items": self._build_recent_message_items(selected_messages),
"recent_messages": recent_lines,
"recent_summary": "",
"trigger_type": trigger.get("trigger_type", "none"),
"reply_mode": reply_mode,
"flow_state": flow_state,
"memory_prompt": self._build_member_memory_prompt(member_context),
"vector_memory_prompt": self._build_vector_memory_prompt(vector_memories),
"group_profile_prompt": self._build_group_profile_prompt(group_profile or {}),
"quote_prompt": self._build_quote_prompt(quote_context or {}),
"image_prompt": self._build_image_prompt(image_context or {}),
"current_message": f"{sender_name}: {content}",
}
@staticmethod
def _build_recent_message_items(messages: List[Dict]) -> List[Dict]:
items: List[Dict] = []
for idx, item in enumerate(messages, start=1):
content = str(item.get("content") or item.get("message") or "").strip()
if not content:
continue
items.append({
"idx": idx,
"sender": item.get("sender_name") or item.get("sender") or "未知成员",
"content": content[:120],
"is_at": bool(item.get("is_at")),
})
return items
def _select_recent_messages(
self,
recent_messages: List[Dict],
current_sender: str,
current_content: str,
quote_context: Dict,
) -> List[Dict]:
if not recent_messages:
return []
window = recent_messages[-self.recent_context_size:]
if len(window) <= 8:
return window
current_tokens = self._extract_topic_tokens(current_content)
quote_tokens = self._extract_topic_tokens(
f"{quote_context.get('title', '')} {quote_context.get('quote_body', '')}"
)
focus_tokens = current_tokens | quote_tokens
quote_sender_name = str(quote_context.get("quote_sender_name", "") or "").strip().lower()
scored: List[tuple[int, int, Dict]] = []
for idx, item in enumerate(window):
score = self._message_relevance(
item,
current_sender=current_sender,
focus_tokens=focus_tokens,
quote_sender_name=quote_sender_name,
)
if score > 0:
scored.append((score, idx, item))
# 总是保留尾部几条,维持现场感;再拼上与当前话题最相关的消息。
tail_indexes = set(range(max(len(window) - 4, 0), len(window)))
keep_indexes = set(tail_indexes)
for _, idx, _ in sorted(scored, key=lambda x: (-x[0], -x[1]))[:10]:
keep_indexes.add(idx)
selected = [window[idx] for idx in sorted(keep_indexes)]
if len(selected) < 6:
return window[-6:]
return selected[-12:]
@classmethod
def _message_relevance(
cls,
item: Dict,
*,
current_sender: str,
focus_tokens: set[str],
quote_sender_name: str,
) -> int:
content = str(item.get("content") or item.get("message") or "").strip()
if not content:
return 0
sender = str(item.get("sender", "") or "")
sender_name = str(item.get("sender_name", "") or "").strip().lower()
score = 0
if sender == current_sender:
score += 3
if quote_sender_name and quote_sender_name in sender_name:
score += 3
if item.get("is_at"):
score += 1
if focus_tokens:
tokens = cls._extract_topic_tokens(content)
overlap = focus_tokens & tokens
score += min(len(overlap) * 2, 6)
if overlap and cls._looks_like_question_or_answer(content):
score += 2
elif sender == current_sender:
score += 1
if cls._looks_like_question_or_answer(content):
score += 1
return score
@staticmethod
def _looks_like_question_or_answer(content: str) -> bool:
text = str(content or "").strip().lower()
if not text:
return False
patterns = [
r"\?$", r"$", r"怎么", r"如何", r"为啥", r"为什么", r"能不能", r"可以吗",
r"报错", r"试试", r"", r"然后", r"配置", r"日志", r"接口", r"原因",
]
return any(re.search(pattern, text, flags=re.IGNORECASE) for pattern in patterns)
@staticmethod
def _extract_topic_tokens(content: str) -> set[str]:
text = str(content or "").lower()
tokens = set(re.findall(r"[a-z0-9_\\-]{3,}", text))
keywords = [
"openclaw", "qdrant", "ollama", "docker", "python", "api", "插件", "机器人", "模型",
"日志", "配置", "报错", "部署", "联网", "图片", "记忆", "群聊", "dota", "战绩",
]
for keyword in keywords:
if keyword in text:
tokens.add(keyword)
return tokens
@staticmethod
def _clean_display_name(sender_name: str) -> str:
import re
text = str(sender_name or "").strip()
if not text:
return ""
text = re.sub(r"\s+", "", text)
text = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9_]", "", text)
return text[:8]
@staticmethod
def _build_member_memory_prompt(member_context: Dict) -> str:
if not member_context:
return "暂无稳定成员画像。"
meta = member_context.get("meta", {}) or {}
topics = member_context.get("topics_of_interest", []) or []
recent_focus = member_context.get("recent_focus", []) or []
skills = ContextBuilder._stringify_items(meta.get("skill_profile", []), 5)
stable_traits = ContextBuilder._stringify_items(meta.get("stable_traits", []), 4)
habits = ContextBuilder._stringify_items(meta.get("habit_patterns", []), 4)
reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []), 4)
recent_state = ContextBuilder._stringify_items(meta.get("recent_state", []), 4)
reply_taboos = ContextBuilder._stringify_items(meta.get("reply_taboos", []), 3)
lines = [
f"成员摘要:{member_context.get('summary_text', '')}".strip(),
f"互动风格:{member_context.get('interaction_style', '')}".strip(),
f"回复偏好:{member_context.get('response_style_hint', '')}".strip(),
f"长期主题:{', '.join(topics[:5])}" if topics else "",
f"近期关注:{', '.join(recent_focus[:4])}" if recent_focus else "",
f"技能侧重点:{skills}" if skills else "",
f"稳定特征:{stable_traits}" if stable_traits else "",
f"习惯模式:{habits}" if habits else "",
f"长期回复偏好:{reply_prefs}" if reply_prefs else "",
f"近期状态:{recent_state}" if recent_state else "",
f"气质倾向:{meta.get('temperament_tendency', '')}".strip(),
f"群内角色:{meta.get('group_role', '')}".strip(),
f"回复禁忌:{reply_taboos}" if reply_taboos else "",
]
return "\n".join([line for line in lines if line])
@staticmethod
def _stringify_items(items: List | str, limit: int) -> str:
if isinstance(items, str):
return items.strip()
values: List[str] = []
for item in items[:limit]:
if isinstance(item, dict):
value = str(
item.get("name")
or item.get("label")
or item.get("value")
or item.get("text")
or ""
).strip()
else:
value = str(item or "").strip()
if value and value not in values:
values.append(value)
return ", ".join(values)
@staticmethod
def _build_vector_memory_prompt(vector_memories: List[Dict]) -> str:
if not vector_memories:
return ""
lines = []
for item in vector_memories[:2]:
summary = item.get("content_summary") or item.get("summary_text") or item.get("text") or ""
memory_type = item.get("memory_type", "memory")
if summary:
lines.append(f"[{memory_type}] {summary}")
return "\n".join(lines)
@staticmethod
def _build_group_profile_prompt(group_profile: Dict) -> str:
if not group_profile:
return "当前群没有特殊知识域限制。"
focus = ", ".join(group_profile.get("knowledge_focus", [])[:6])
boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6])
summary = str(group_profile.get("group_memory_summary", "") or "").replace("\n", " ").strip()
if len(summary) > 120:
summary = summary[:117] + "..."
lines = [
f"群模式:{group_profile.get('mode', 'social')}",
f"知识域:{group_profile.get('knowledge_domain', 'general')}",
f"配置知识域:{group_profile.get('configured_domain', 'general')}",
f"历史推断知识域:{group_profile.get('group_memory_domain', 'general')}",
f"回答风格:{group_profile.get('reply_style', '自然短句')}",
f"互动调性:{group_profile.get('interaction_tone', '自然群友感')}",
f"幽默强度:{group_profile.get('humor_style', '轻微')}",
f"嘴硬程度:{group_profile.get('sharpness_style', '轻微嘴硬,不刻薄')}",
f"表达松弛度:{group_profile.get('expressiveness_style', '克制')}",
f"称呼强度:{group_profile.get('address_style', '低频称呼,默认直接接话')}",
f"知识重点:{focus}" if focus else "",
f"群长期摘要:{summary}" if summary else "",
f"历史推断社交风格:{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}"
if group_profile.get("group_memory_style")
else "",
f"边界提醒:{boundaries}" if boundaries else "",
f"人格叠加:{group_profile.get('persona_overlay', '')}".strip(),
]
return "\n".join([line for line in lines if line])
@staticmethod
def _build_style_summary(style_profile: Dict) -> str:
if not style_profile:
return ""
return " / ".join(
[
str(style_profile.get("interaction_tone", "") or "").strip(),
str(style_profile.get("humor_style", "") or "").strip(),
str(style_profile.get("sharpness_style", "") or "").strip(),
str(style_profile.get("expressiveness_style", "") or "").strip(),
]
).strip(" /")
@staticmethod
def _build_quote_prompt(quote_context: Dict) -> str:
if not quote_context:
return ""
quote_type = quote_context.get("quote_type_label", "引用消息")
quote_sender = quote_context.get("quote_sender_name", "") or "未知成员"
quote_body = quote_context.get("quote_body", "") or ""
title = quote_context.get("title", "") or ""
lines = [
f"用户这次是在引用消息后发言。",
f"引用类型:{quote_type}",
f"被引用发送者:{quote_sender}",
f"图片附件:已附带原图" if quote_context.get("has_image_attachment") else "",
f"引用标题:{title}" if title else "",
f"被引用内容:{quote_body}" if quote_body else "",
]
return "\n".join([line for line in lines if line])
@staticmethod
def _build_image_prompt(image_context: Dict) -> str:
if not image_context:
return ""
lines = [
"已附带最近一张群图片作为上下文。",
f"图片发送者:{image_context.get('sender_name', '未知成员')}",
f"图片说明:{image_context.get('hint', '')}" if image_context.get("hint") else "",
]
return "\n".join([line for line in lines if line])