变更项: 1. 收紧回复长度策略:social_short/qa_fast/qa_with_context 全部缩短,减少长句与说明文风格。 2. 强化提示词约束:默认30字内、最多2句且总长不超过55字,禁止大段铺垫。 3. 新增@画像高优先通道:当消息为@或强定向时,构建并注入 at_member_profile_prompt。 4. Dify输入同步注入@画像与 is_at/is_directed 控制字段,保证不同LLM后端行为一致。
442 lines
20 KiB
Python
442 lines
20 KiB
Python
from __future__ import annotations
|
||
|
||
import re
|
||
from typing import Dict, List
|
||
|
||
|
||
class ContextBuilder:
|
||
def __init__(self, recent_context_size: int = 30):
|
||
self.recent_context_size = max(int(recent_context_size or 30), 1)
|
||
|
||
def build(
|
||
self,
|
||
*,
|
||
room_id: str,
|
||
group_profile: Dict,
|
||
sender: str,
|
||
sender_name: str,
|
||
content: str,
|
||
recent_messages: List[Dict],
|
||
member_context: Dict,
|
||
member_memory_focus: List[str] | None = None,
|
||
trigger: Dict,
|
||
flow_state: str,
|
||
reply_mode: str,
|
||
vector_memories: List[Dict],
|
||
social_memory: Dict | None = None,
|
||
group_facts: Dict | None = None,
|
||
quote_context: Dict | None = None,
|
||
image_context: Dict | None = None,
|
||
) -> Dict:
|
||
selected_messages = self._select_recent_messages(recent_messages, sender, content, quote_context or {})
|
||
recent_lines = []
|
||
for item in selected_messages:
|
||
msg_sender = item.get("sender_name") or item.get("sender") or "未知成员"
|
||
msg_content = item.get("content") or item.get("message") or ""
|
||
if msg_content:
|
||
recent_lines.append(f"{msg_sender}: {msg_content}")
|
||
return {
|
||
"group_profile": group_profile or {"room_id": room_id},
|
||
"speaker_profile": {
|
||
"wxid": sender,
|
||
"display_name": sender_name,
|
||
"member_context": member_context or {},
|
||
},
|
||
"speaker_name_clean": self._clean_display_name(sender_name),
|
||
"is_at": bool(trigger.get("is_at", False)),
|
||
"is_directed": bool(trigger.get("is_directed", False)),
|
||
"recent_message_items": self._build_recent_message_items(selected_messages),
|
||
"recent_messages": recent_lines,
|
||
"recent_summary": "",
|
||
"trigger_type": trigger.get("trigger_type", "none"),
|
||
"reply_mode": reply_mode,
|
||
"flow_state": flow_state,
|
||
"memory_prompt": self._build_member_memory_prompt(member_context, member_memory_focus or []),
|
||
"at_member_profile_prompt": self._build_at_member_profile_prompt(
|
||
member_context=member_context or {},
|
||
focus_lines=member_memory_focus or [],
|
||
is_at=bool(trigger.get("is_at", False)),
|
||
is_directed=bool(trigger.get("is_directed", False)),
|
||
),
|
||
"vector_memory_prompt": self._build_vector_memory_prompt(vector_memories),
|
||
"social_memory_prompt": self._build_social_memory_prompt(social_memory or {}),
|
||
"group_facts_prompt": self._build_group_facts_prompt(group_facts or {}),
|
||
"group_profile_prompt": self._build_group_profile_prompt(group_profile or {}),
|
||
"quote_prompt": self._build_quote_prompt(quote_context or {}),
|
||
"image_prompt": self._build_image_prompt(image_context or {}),
|
||
"image_safety_prompt": self._build_image_safety_prompt(
|
||
(quote_context or {}).get("image_safety") or {}
|
||
),
|
||
"current_message": f"{sender_name}: {content}",
|
||
}
|
||
|
||
@staticmethod
|
||
def _build_recent_message_items(messages: List[Dict]) -> List[Dict]:
|
||
items: List[Dict] = []
|
||
for idx, item in enumerate(messages, start=1):
|
||
content = str(item.get("content") or item.get("message") or "").strip()
|
||
if not content:
|
||
continue
|
||
items.append({
|
||
"idx": idx,
|
||
"sender": item.get("sender_name") or item.get("sender") or "未知成员",
|
||
"content": content[:120],
|
||
"is_at": bool(item.get("is_at")),
|
||
})
|
||
return items
|
||
|
||
def _select_recent_messages(
|
||
self,
|
||
recent_messages: List[Dict],
|
||
current_sender: str,
|
||
current_content: str,
|
||
quote_context: Dict,
|
||
) -> List[Dict]:
|
||
if not recent_messages:
|
||
return []
|
||
window = recent_messages[-self.recent_context_size:]
|
||
if len(window) <= 8:
|
||
return window
|
||
|
||
current_tokens = self._extract_topic_tokens(current_content)
|
||
quote_tokens = self._extract_topic_tokens(
|
||
f"{quote_context.get('title', '')} {quote_context.get('quote_body', '')}"
|
||
)
|
||
focus_tokens = current_tokens | quote_tokens
|
||
quote_sender_name = str(quote_context.get("quote_sender_name", "") or "").strip().lower()
|
||
|
||
scored: List[tuple[int, int, Dict]] = []
|
||
for idx, item in enumerate(window):
|
||
score = self._message_relevance(
|
||
item,
|
||
current_sender=current_sender,
|
||
focus_tokens=focus_tokens,
|
||
quote_sender_name=quote_sender_name,
|
||
)
|
||
if score > 0:
|
||
scored.append((score, idx, item))
|
||
|
||
# 总是保留尾部几条,维持现场感;再拼上与当前话题最相关的消息。
|
||
tail_indexes = set(range(max(len(window) - 4, 0), len(window)))
|
||
keep_indexes = set(tail_indexes)
|
||
for _, idx, _ in sorted(scored, key=lambda x: (-x[0], -x[1]))[:10]:
|
||
keep_indexes.add(idx)
|
||
|
||
selected = [window[idx] for idx in sorted(keep_indexes)]
|
||
if len(selected) < 6:
|
||
return window[-6:]
|
||
return selected[-12:]
|
||
|
||
@classmethod
|
||
def _message_relevance(
|
||
cls,
|
||
item: Dict,
|
||
*,
|
||
current_sender: str,
|
||
focus_tokens: set[str],
|
||
quote_sender_name: str,
|
||
) -> int:
|
||
content = str(item.get("content") or item.get("message") or "").strip()
|
||
if not content:
|
||
return 0
|
||
sender = str(item.get("sender", "") or "")
|
||
sender_name = str(item.get("sender_name", "") or "").strip().lower()
|
||
score = 0
|
||
|
||
if sender == current_sender:
|
||
score += 3
|
||
if quote_sender_name and quote_sender_name in sender_name:
|
||
score += 3
|
||
if item.get("is_at"):
|
||
score += 1
|
||
|
||
if focus_tokens:
|
||
tokens = cls._extract_topic_tokens(content)
|
||
overlap = focus_tokens & tokens
|
||
score += min(len(overlap) * 2, 6)
|
||
if overlap and cls._looks_like_question_or_answer(content):
|
||
score += 2
|
||
elif sender == current_sender:
|
||
score += 1
|
||
|
||
if cls._looks_like_question_or_answer(content):
|
||
score += 1
|
||
return score
|
||
|
||
@staticmethod
|
||
def _looks_like_question_or_answer(content: str) -> bool:
|
||
text = str(content or "").strip().lower()
|
||
if not text:
|
||
return False
|
||
patterns = [
|
||
r"\?$", r"?$", r"怎么", r"如何", r"为啥", r"为什么", r"能不能", r"可以吗",
|
||
r"报错", r"试试", r"先", r"然后", r"配置", r"日志", r"接口", r"原因",
|
||
]
|
||
return any(re.search(pattern, text, flags=re.IGNORECASE) for pattern in patterns)
|
||
|
||
@staticmethod
|
||
def _extract_topic_tokens(content: str) -> set[str]:
|
||
text = str(content or "").lower()
|
||
tokens = set(re.findall(r"[a-z0-9_\\-]{3,}", text))
|
||
keywords = [
|
||
"openclaw", "qdrant", "ollama", "docker", "python", "api", "插件", "机器人", "模型",
|
||
"日志", "配置", "报错", "部署", "联网", "图片", "记忆", "群聊", "dota", "战绩",
|
||
]
|
||
for keyword in keywords:
|
||
if keyword in text:
|
||
tokens.add(keyword)
|
||
return tokens
|
||
|
||
@staticmethod
|
||
def _clean_display_name(sender_name: str) -> str:
|
||
import re
|
||
|
||
text = str(sender_name or "").strip()
|
||
if not text:
|
||
return ""
|
||
text = re.sub(r"\s+", "", text)
|
||
text = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9_]", "", text)
|
||
return text[:8]
|
||
|
||
@staticmethod
|
||
def _build_member_memory_prompt(member_context: Dict, focus_lines: List[str] | None = None) -> str:
|
||
if not member_context:
|
||
return "暂无稳定成员画像。"
|
||
meta = member_context.get("meta", {}) or {}
|
||
topics = member_context.get("topics_of_interest", []) or []
|
||
recent_focus = member_context.get("recent_focus", []) or []
|
||
common_scenarios = ContextBuilder._stringify_items(meta.get("common_scenarios", []), 4)
|
||
skills = ContextBuilder._stringify_items(meta.get("skill_profile", []), 5)
|
||
problem_solving = ContextBuilder._stringify_items(meta.get("problem_solving_profile", []), 4)
|
||
stable_traits = ContextBuilder._stringify_items(meta.get("stable_traits", []), 4)
|
||
habits = ContextBuilder._stringify_items(meta.get("habit_patterns", []), 4)
|
||
expression_profile = ContextBuilder._stringify_items(meta.get("expression_profile", []), 4)
|
||
reply_entry = ContextBuilder._stringify_items(meta.get("reply_entry_profile", []), 4)
|
||
reply_prefs = ContextBuilder._stringify_items(meta.get("long_term_reply_preferences", []), 4)
|
||
recent_state = ContextBuilder._stringify_items(meta.get("recent_state", []), 4)
|
||
reply_taboos = ContextBuilder._stringify_items(meta.get("reply_taboos", []), 3)
|
||
lines = [
|
||
f"成员摘要:{member_context.get('summary_text', '')}".strip(),
|
||
f"互动风格:{member_context.get('interaction_style', '')}".strip(),
|
||
f"回复偏好:{member_context.get('response_style_hint', '')}".strip(),
|
||
f"本次相关记忆:{';'.join((focus_lines or [])[:4])}" if focus_lines else "",
|
||
f"长期主题:{', '.join(topics[:5])}" if topics else "",
|
||
f"近期关注:{', '.join(recent_focus[:4])}" if recent_focus else "",
|
||
f"常见发言场景:{common_scenarios}" if common_scenarios else "",
|
||
f"技能侧重点:{skills}" if skills else "",
|
||
f"处理问题方式:{problem_solving}" if problem_solving else "",
|
||
f"稳定特征:{stable_traits}" if stable_traits else "",
|
||
f"习惯模式:{habits}" if habits else "",
|
||
f"表达标记:{expression_profile}" if expression_profile else "",
|
||
f"有效接话点:{reply_entry}" if reply_entry else "",
|
||
f"长期回复偏好:{reply_prefs}" if reply_prefs else "",
|
||
f"近期状态:{recent_state}" if recent_state else "",
|
||
f"气质倾向:{meta.get('temperament_tendency', '')}".strip(),
|
||
f"群内角色:{meta.get('group_role', '')}".strip(),
|
||
f"回复禁忌:{reply_taboos}" if reply_taboos else "",
|
||
]
|
||
return "\n".join([line for line in lines if line])
|
||
|
||
@staticmethod
|
||
def _build_at_member_profile_prompt(
|
||
member_context: Dict,
|
||
focus_lines: List[str] | None = None,
|
||
is_at: bool = False,
|
||
is_directed: bool = False,
|
||
) -> str:
|
||
# 只有明确 @ 或强定向时才给“高优先级成员画像”,避免平时过度套人设
|
||
if not (is_at or is_directed):
|
||
return ""
|
||
if not member_context:
|
||
return "本次是对方点名发起,但暂无稳定画像,按自然群友口吻短回复。"
|
||
|
||
meta = member_context.get("meta", {}) or {}
|
||
summary = str(member_context.get("summary_text", "") or "").strip()
|
||
interaction_style = str(member_context.get("interaction_style", "") or "").strip()
|
||
response_hint = str(member_context.get("response_style_hint", "") or "").strip()
|
||
topics = ContextBuilder._stringify_items(member_context.get("topics_of_interest", []) or [], 4)
|
||
focus = ";".join((focus_lines or [])[:3]).strip()
|
||
lines = [
|
||
"本次为点名互动,优先参考该成员画像后再回复:",
|
||
f"成员摘要:{summary}" if summary else "",
|
||
f"互动风格:{interaction_style}" if interaction_style else "",
|
||
f"偏好回复方式:{response_hint}" if response_hint else "",
|
||
f"近期相关记忆:{focus}" if focus else "",
|
||
f"长期兴趣:{topics}" if topics else "",
|
||
f"禁忌提醒:{ContextBuilder._stringify_items(meta.get('reply_taboos', []), 3)}"
|
||
if meta.get("reply_taboos")
|
||
else "",
|
||
"语气要像熟悉的群友,短句、自然,不要客服腔。",
|
||
]
|
||
return "\n".join([line for line in lines if line])
|
||
|
||
@staticmethod
|
||
def _stringify_items(items: List | str, limit: int) -> str:
|
||
if isinstance(items, str):
|
||
return items.strip()
|
||
values: List[str] = []
|
||
for item in items[:limit]:
|
||
if isinstance(item, dict):
|
||
value = str(
|
||
item.get("name")
|
||
or item.get("label")
|
||
or item.get("value")
|
||
or item.get("text")
|
||
or ""
|
||
).strip()
|
||
else:
|
||
value = str(item or "").strip()
|
||
if value and value not in values:
|
||
values.append(value)
|
||
return ", ".join(values)
|
||
|
||
@staticmethod
|
||
def _build_vector_memory_prompt(vector_memories: List[Dict]) -> str:
|
||
if not vector_memories:
|
||
return ""
|
||
lines = []
|
||
for item in vector_memories[:2]:
|
||
summary = item.get("content_summary") or item.get("summary_text") or item.get("text") or ""
|
||
memory_type = item.get("memory_type", "memory")
|
||
if summary:
|
||
lines.append(f"[{memory_type}] {summary}")
|
||
return "\n".join(lines)
|
||
|
||
@staticmethod
|
||
def _build_social_memory_prompt(social_memory: Dict) -> str:
|
||
prompt = str((social_memory or {}).get("prompt", "") or "").strip()
|
||
return prompt
|
||
|
||
@staticmethod
|
||
def _build_group_facts_prompt(group_facts: Dict) -> str:
|
||
return str((group_facts or {}).get("prompt", "") or "").strip()
|
||
|
||
@staticmethod
|
||
def _build_group_profile_prompt(group_profile: Dict) -> str:
|
||
if not group_profile:
|
||
return "当前群没有特殊知识域限制。"
|
||
focus = ", ".join(group_profile.get("knowledge_focus", [])[:6])
|
||
boundaries = ", ".join(group_profile.get("topic_boundaries", [])[:6])
|
||
summary = ContextBuilder._compact_group_summary(str(group_profile.get("group_memory_summary", "") or ""))
|
||
lines = [
|
||
f"群模式:{group_profile.get('mode', 'social')}",
|
||
f"知识域偏向:{group_profile.get('knowledge_domain', 'general')}(仅作理解倾向,不是每次都要显式提到)",
|
||
f"配置知识域:{group_profile.get('configured_domain', 'general')}(仅在当前话题相关时参考)",
|
||
f"历史推断知识域:{group_profile.get('group_memory_domain', 'general')}(弱参考)",
|
||
f"回答风格:{group_profile.get('reply_style', '自然短句')}",
|
||
f"互动调性:{group_profile.get('interaction_tone', '自然群友感')}",
|
||
f"幽默强度:{group_profile.get('humor_style', '轻微')}",
|
||
f"嘴硬程度:{group_profile.get('sharpness_style', '轻微嘴硬,不刻薄')}",
|
||
f"表达松弛度:{group_profile.get('expressiveness_style', '克制')}",
|
||
f"称呼强度:{group_profile.get('address_style', '低频称呼,默认直接接话')}",
|
||
f"可能相关的话题背景:{focus}" if focus else "",
|
||
f"群长期摘要关键句:{summary}" if summary else "",
|
||
f"历史推断社交风格:{ContextBuilder._build_style_summary(group_profile.get('group_memory_style', {}))}"
|
||
if group_profile.get("group_memory_style")
|
||
else "",
|
||
f"边界提醒:{boundaries}" if boundaries else "",
|
||
f"人格叠加:{group_profile.get('persona_overlay', '')}".strip(),
|
||
]
|
||
return "\n".join([line for line in lines if line])
|
||
|
||
@staticmethod
|
||
def _build_style_summary(style_profile: Dict) -> str:
|
||
if not style_profile:
|
||
return ""
|
||
return " / ".join(
|
||
[
|
||
str(style_profile.get("interaction_tone", "") or "").strip(),
|
||
str(style_profile.get("humor_style", "") or "").strip(),
|
||
str(style_profile.get("sharpness_style", "") or "").strip(),
|
||
str(style_profile.get("expressiveness_style", "") or "").strip(),
|
||
]
|
||
).strip(" /")
|
||
|
||
@staticmethod
|
||
def _compact_group_summary(summary_text: str, max_chars: int = 420, max_sentences: int = 6) -> str:
|
||
text = str(summary_text or "").strip()
|
||
if not text:
|
||
return ""
|
||
text = re.sub(r"\s+", " ", text.replace("\n", " ").replace("\r", " ")).strip()
|
||
if len(text) <= max_chars:
|
||
return text
|
||
|
||
# 长摘要按句提炼,优先保留“结论/风险/动作/配置”等关键词句,避免简单截断丢重点。
|
||
sentences = [part.strip(" ,,;;。.!?!?::") for part in re.split(r"[。!?!?;;\n]+", text) if part.strip()]
|
||
if not sentences:
|
||
return text[: max_chars - 3] + "..."
|
||
|
||
key_patterns = [
|
||
r"结论|核心|重点|关键|建议|方案|步骤|原因|影响|风险|注意|问题|异常|报错|故障|超时|阻塞",
|
||
r"配置|参数|阈值|策略|限制|回退|优化|修复|排查|上线|回滚|依赖|版本|兼容",
|
||
]
|
||
scored: List[tuple[int, int, str]] = []
|
||
for idx, sentence in enumerate(sentences):
|
||
score = 0
|
||
for pattern in key_patterns:
|
||
if re.search(pattern, sentence, flags=re.IGNORECASE):
|
||
score += 3
|
||
if re.search(r"\d", sentence):
|
||
score += 1
|
||
if 8 <= len(sentence) <= 80:
|
||
score += 1
|
||
if idx == 0 or idx == len(sentences) - 1:
|
||
score += 1
|
||
scored.append((score, idx, sentence))
|
||
|
||
chosen_indexes = {0, len(sentences) - 1}
|
||
for _, idx, _ in sorted(scored, key=lambda x: (-x[0], x[1])):
|
||
chosen_indexes.add(idx)
|
||
if len(chosen_indexes) >= max_sentences:
|
||
break
|
||
|
||
chosen = [sentences[idx] for idx in sorted(chosen_indexes)]
|
||
merged = ";".join([item for item in chosen if item]).strip(";")
|
||
if len(merged) <= max_chars:
|
||
return merged
|
||
return merged[: max_chars - 3].rstrip(" ,,;;。.!?!?::") + "..."
|
||
|
||
@staticmethod
|
||
def _build_quote_prompt(quote_context: Dict) -> str:
|
||
if not quote_context:
|
||
return ""
|
||
quote_type = quote_context.get("quote_type_label", "引用消息")
|
||
quote_sender = quote_context.get("quote_sender_name", "") or "未知成员"
|
||
quote_body = quote_context.get("quote_body", "") or ""
|
||
title = quote_context.get("title", "") or ""
|
||
lines = [
|
||
f"用户这次是在引用消息后发言。",
|
||
f"引用类型:{quote_type}",
|
||
f"被引用发送者:{quote_sender}",
|
||
f"图片附件:已附带原图" if quote_context.get("has_image_attachment") else "",
|
||
f"引用标题:{title}" if title else "",
|
||
f"被引用内容:{quote_body}" if quote_body else "",
|
||
]
|
||
return "\n".join([line for line in lines if line])
|
||
|
||
@staticmethod
|
||
def _build_image_prompt(image_context: Dict) -> str:
|
||
if not image_context:
|
||
return ""
|
||
lines = [
|
||
"已附带最近一张群图片作为上下文。",
|
||
f"图片发送者:{image_context.get('sender_name', '未知成员')}",
|
||
f"图片说明:{image_context.get('hint', '')}" if image_context.get("hint") else "",
|
||
]
|
||
return "\n".join([line for line in lines if line])
|
||
|
||
@staticmethod
|
||
def _build_image_safety_prompt(image_safety: Dict) -> str:
|
||
if not image_safety or not image_safety.get("suspected"):
|
||
return ""
|
||
if image_safety.get("has_visual_context"):
|
||
return "当前发言疑似是在评论图片,但本次已附带图片上下文,可以基于图片谨慎理解。"
|
||
reason = str(image_safety.get("reason", "") or "").strip()
|
||
lines = [
|
||
"当前发言疑似是在评论图片,但你这次没有看到图片本身。",
|
||
f"原因:{reason}" if reason else "",
|
||
"不要假装看过图,不要直接评价画面细节、人物状态、构图、文字内容或颜色元素。",
|
||
"如果要回,只能轻微承认信息不足,或请对方引用图片/补一句文字说明,再继续。",
|
||
]
|
||
return "\n".join([line for line in lines if line])
|