Files
abot/plugins/member_context/service.py
2026-04-02 11:49:20 +08:00

453 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import json
import math
import re
from collections import Counter
from datetime import datetime
from typing import Dict, List, Optional
import requests
from loguru import logger
from db.connection import DBConnectionManager
from db.contacts_db import ContactsDBOperator
from db.member_context_db import MemberContextDBOperator
from db.message_storage import MessageStorageDB
from utils.robot_cmd.robot_command import Feature, GroupBotManager, PermissionStatus
class MemberContextService:
"""成员交互摘要插件内部服务"""
FEATURE_KEY = "MEMBER_CONTEXT_CAPABILITY"
STOPWORDS = {
"这个", "那个", "就是", "然后", "怎么", "什么", "你们", "我们", "他们", "是不是", "可以",
"一下", "一个", "已经", "还有", "没有", "因为", "所以", "如果", "但是", "还是", "今天",
"昨天", "现在", "时候", "感觉", "真的", "应该", "知道", "觉得", "问题", "老师", "老板",
"群里", "大家", "一下子", "自己", "东西", "这里", "那里", "进行", "需要", "关于"
}
def __init__(self, db_manager: DBConnectionManager, plugin_config: Optional[Dict] = None):
self.db_manager = db_manager
self.contacts_db = ContactsDBOperator(self.db_manager)
self.message_db = MessageStorageDB(self.db_manager)
self.member_context_db = MemberContextDBOperator(self.db_manager)
self.LOG = logger
self.plugin_config = plugin_config or {}
api_config = self.plugin_config.get("api", {})
profile_config = self.plugin_config.get("profile", {})
self.ai_enabled = bool(api_config.get("enabled", False))
self.ai_base_url = (api_config.get("base_url") or "").rstrip("/")
self.ai_api_key = api_config.get("api_key", "")
self.ai_endpoint = str(api_config.get("endpoint", "completion-messages")).lstrip("/")
self.ai_timeout = int(api_config.get("request_timeout", 60))
self.sample_days = int(profile_config.get("sample_days", 30))
self.ai_sample_limit = int(profile_config.get("sample_message_limit", 80))
self.refresh_limit_per_member = int(profile_config.get("refresh_limit_per_member", 200))
schedule_config = self.plugin_config.get("schedule", {})
self.only_recent_active_groups = bool(schedule_config.get("only_recent_active_groups", False))
self.active_hours = int(schedule_config.get("active_hours", 72))
self.min_group_messages = int(schedule_config.get("min_group_messages", 20))
def build_member_context(self, chatroom_id: str, wxid: str, days: Optional[int] = None,
limit: Optional[int] = None) -> Dict:
days = days or self.sample_days
limit = limit or self.refresh_limit_per_member
member = self.contacts_db.get_chatroom_member_info(chatroom_id, wxid) or {}
messages = self.message_db.get_member_recent_messages(chatroom_id, wxid, days=days, limit=limit)
recent_messages = self.message_db.get_member_recent_messages(chatroom_id, wxid, days=min(days, 7), limit=100)
display_name = member.get("display_name") or member.get("nick_name") or wxid
activity_level = self._calc_activity_level(len(messages), days)
message_pattern = self._build_message_pattern(messages)
response_style_hint = self._build_response_style_hint(messages)
topics = self._extract_keywords(messages, limit=5)
recent_focus = self._extract_keywords(recent_messages, limit=4)
confidence = self._calc_confidence(len(messages))
context = {
"chatroom_id": chatroom_id,
"wxid": wxid,
"display_name": display_name,
"activity_level": activity_level,
"message_pattern": message_pattern,
"interaction_style": self._build_interaction_style(messages),
"response_style_hint": response_style_hint,
"topics_of_interest": topics,
"recent_focus": recent_focus,
"summary_text": self._build_summary_text(activity_level, message_pattern, response_style_hint, topics, recent_focus),
"confidence": confidence,
"source_message_count": len(messages),
"source_days": days,
"last_profiled_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"meta": self._build_meta(messages, recent_messages),
}
ai_context = self._generate_ai_context(chatroom_id, wxid, display_name, context, messages)
if ai_context:
context.update({
"activity_level": ai_context.get("activity_level") or context["activity_level"],
"message_pattern": ai_context.get("message_pattern") or context["message_pattern"],
"interaction_style": ai_context.get("interaction_style") or context["interaction_style"],
"response_style_hint": ai_context.get("response_style_hint") or context["response_style_hint"],
"topics_of_interest": ai_context.get("topics_of_interest") or context["topics_of_interest"],
"recent_focus": ai_context.get("recent_focus") or context["recent_focus"],
"summary_text": ai_context.get("summary_text") or context["summary_text"],
"confidence": ai_context.get("confidence", context["confidence"]),
})
context["meta"].update(ai_context.get("meta", {}))
return context
def refresh_member_context(self, chatroom_id: str, wxid: str, days: Optional[int] = None,
limit: Optional[int] = None) -> Dict:
if not self.is_group_enabled(chatroom_id):
raise ValueError(f"{chatroom_id} 未启用成员交互摘要功能")
context = self.build_member_context(chatroom_id, wxid, days=days, limit=limit)
self.member_context_db.save_member_context(context)
return context
def refresh_group_contexts(self, chatroom_id: str, days: Optional[int] = None,
limit_per_member: Optional[int] = None) -> Dict:
days = days or self.sample_days
limit_per_member = limit_per_member or self.refresh_limit_per_member
if not self.is_group_enabled(chatroom_id):
self.LOG.info(f"{chatroom_id} 未启用成员交互摘要功能,跳过刷新")
return {"refreshed": 0, "skipped": 0, "disabled": True}
members = self.contacts_db.get_chatroom_member_list(chatroom_id) or []
refreshed = 0
skipped = 0
for member in members:
if member.get("status", 1) != 1:
continue
wxid = member.get("wxid")
if not wxid:
continue
context = self.build_member_context(chatroom_id, wxid, days=days, limit=limit_per_member)
if context["source_message_count"] <= 0:
skipped += 1
continue
self.member_context_db.save_member_context(context)
refreshed += 1
return {"refreshed": refreshed, "skipped": skipped}
def refresh_all_chatrooms(self, days: Optional[int] = None, limit_per_member: Optional[int] = None) -> Dict:
days = days or self.sample_days
limit_per_member = limit_per_member or self.refresh_limit_per_member
groups = self.contacts_db.get_chatroom_list() or []
active_group_ids = self._get_recent_active_chatrooms() if self.only_recent_active_groups else None
group_count = 0
member_count = 0
skipped = 0
disabled = 0
inactive = 0
for group in groups:
chatroom_id = group.get("chatroom_id")
if not chatroom_id:
continue
if active_group_ids is not None and chatroom_id not in active_group_ids:
inactive += 1
continue
result = self.refresh_group_contexts(chatroom_id, days=days, limit_per_member=limit_per_member)
if result.get("disabled"):
disabled += 1
continue
group_count += 1
member_count += result["refreshed"]
skipped += result["skipped"]
self.LOG.info(f"成员交互摘要刷新完成: 启用活跃群={group_count}, 成员={member_count}, 跳过={skipped}, 未启用群={disabled}, 非活跃群={inactive}")
return {"groups": group_count, "members": member_count, "skipped": skipped, "disabled_groups": disabled, "inactive_groups": inactive}
def is_group_enabled(self, chatroom_id: str) -> bool:
feature = Feature.get_feature(self.FEATURE_KEY)
if feature is None:
return True
return GroupBotManager.get_group_permission(chatroom_id, feature) == PermissionStatus.ENABLED
def _calc_activity_level(self, message_count: int, days: int) -> str:
daily_avg = message_count / max(days, 1)
if message_count >= 80 or daily_avg >= 3:
return "高活跃"
if message_count >= 25 or daily_avg >= 1:
return "中活跃"
if message_count > 0:
return "低活跃"
return "观察中"
def _build_message_pattern(self, messages: List[Dict]) -> str:
if not messages:
return "样本较少,暂不做明显模式判断"
contents = [m.get("content", "") for m in messages if m.get("content")]
if not contents:
return "样本较少,暂不做明显模式判断"
avg_len = sum(len(c) for c in contents) / len(contents)
question_ratio = sum(1 for c in contents if "?" in c or "" in c) / len(contents)
link_ratio = sum(1 for c in contents if "http://" in c or "https://" in c) / len(contents)
traits = []
if avg_len <= 12:
traits.append("短句居多")
elif avg_len >= 35:
traits.append("表达较完整")
else:
traits.append("表达中等长度")
if question_ratio >= 0.35:
traits.append("问题导向明显")
elif question_ratio >= 0.15:
traits.append("偶尔连续追问")
if link_ratio >= 0.15:
traits.append("常分享链接或资料")
if not traits:
traits.append("发言较平稳")
return "".join(traits)
def _build_response_style_hint(self, messages: List[Dict]) -> str:
if not messages:
return "样本不足时保持中性、简洁、避免过度熟络"
contents = [m.get("content", "") for m in messages if m.get("content")]
avg_len = sum(len(c) for c in contents) / max(len(contents), 1)
question_ratio = sum(1 for c in contents if "?" in c or "" in c) / max(len(contents), 1)
if question_ratio >= 0.35:
return "优先给明确结论,再补充步骤或依据,避免空泛回应"
if avg_len <= 12:
return "回复尽量简洁直接,先回答核心点,减少铺垫"
if avg_len >= 35:
return "可以给稍完整的解释,但保持结构清楚,避免冗长"
return "保持自然口语化,结论和解释尽量平衡"
def _build_interaction_style(self, messages: List[Dict]) -> str:
if not messages:
return "互动样本较少"
contents = [m.get("content", "") for m in messages if m.get("content")]
question_ratio = sum(1 for c in contents if "?" in c or "" in c) / max(len(contents), 1)
emoji_ratio = sum(1 for c in contents if re.search(r"[\U0001F300-\U0001FAFF\u2600-\u27BF]", c)) / max(len(contents), 1)
mention_ratio = sum(1 for c in contents if "@" in c) / max(len(contents), 1)
parts = []
if question_ratio >= 0.3:
parts.append("偏提问推进")
if emoji_ratio >= 0.15:
parts.append("表情互动感较强")
if mention_ratio >= 0.1:
parts.append("会主动点名互动")
if not parts:
parts.append("自然跟随式互动")
return "".join(parts)
def _extract_keywords(self, messages: List[Dict], limit: int = 5) -> List[str]:
counter = Counter()
for message in messages:
content = message.get("content", "")
for token in self._tokenize(content):
if token in self.STOPWORDS:
continue
counter[token] += 1
return [word for word, _ in counter.most_common(limit)]
def _tokenize(self, text: str) -> List[str]:
chinese_words = re.findall(r"[\u4e00-\u9fff]{2,6}", text)
english_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", text)
return chinese_words + [word.lower() for word in english_words]
def _calc_confidence(self, message_count: int) -> float:
return round(min(0.95, math.log(message_count + 1, 10)), 2) if message_count > 0 else 0.1
def _build_summary_text(self, activity_level: str, message_pattern: str,
response_style_hint: str, topics: List[str], recent_focus: List[str]) -> str:
parts = [
f"近期互动强度:{activity_level}",
f"表达特征:{message_pattern}",
f"回复建议:{response_style_hint}",
]
if topics:
parts.append(f"长期关注:{''.join(topics)}")
if recent_focus:
parts.append(f"近期话题:{''.join(recent_focus)}")
return "".join(parts)
def _build_meta(self, messages: List[Dict], recent_messages: List[Dict]) -> Dict:
latest_time = None
if recent_messages:
latest = recent_messages[-1].get("timestamp")
if isinstance(latest, datetime):
latest_time = latest.strftime("%Y-%m-%d %H:%M:%S")
elif latest:
latest_time = str(latest)
return {
"message_count_30d": len(messages),
"message_count_7d": len(recent_messages),
"latest_message_time": latest_time,
}
def _get_recent_active_chatrooms(self) -> set:
sql = """
SELECT group_id, COUNT(*) AS msg_count
FROM messages
WHERE group_id LIKE %s
AND timestamp >= DATE_SUB(NOW(), INTERVAL %s HOUR)
GROUP BY group_id
HAVING COUNT(*) >= %s
"""
rows = self.message_db.execute_query(sql, ("%@chatroom", self.active_hours, self.min_group_messages)) or []
return {row.get("group_id") for row in rows if row.get("group_id")}
def _generate_ai_context(self, chatroom_id: str, wxid: str, display_name: str,
base_context: Dict, messages: List[Dict]) -> Optional[Dict]:
if not self.ai_enabled or not self.ai_base_url or not self.ai_api_key:
return None
if len(messages) < 8:
return None
prompt = self._build_ai_prompt(chatroom_id, wxid, display_name, base_context, messages[-self.ai_sample_limit:])
headers = {
"Authorization": f"Bearer {self.ai_api_key}",
"Content-Type": "application/json",
}
payload = {
"inputs": {"query": prompt},
"response_mode": "blocking",
"user": f"member-context:{chatroom_id}:{wxid}",
}
url = f"{self.ai_base_url}/{self.ai_endpoint}"
try:
response = requests.post(url, headers=headers, json=payload, timeout=self.ai_timeout)
response.raise_for_status()
response_data = response.json()
parsed = self._parse_ai_answer(response_data.get("answer", ""))
if not parsed:
return None
usage = (response_data.get("metadata") or {}).get("usage", {}) or {}
parsed["meta"] = {
"ai_provider": "dify",
"ai_mode": "completion",
"ai_tokens": usage.get("total_tokens"),
"ai_latency": usage.get("latency"),
}
return parsed
except Exception as e:
self.LOG.warning(f"成员交互摘要 AI 生成失败,回退到本地摘要: chatroom={chatroom_id}, wxid={wxid}, error={e}")
return None
def _build_ai_prompt(self, chatroom_id: str, wxid: str, display_name: str,
base_context: Dict, messages: List[Dict]) -> str:
message_lines = []
for msg in messages[-40:]:
ts = msg.get("timestamp")
if isinstance(ts, datetime):
ts = ts.strftime("%m-%d %H:%M")
content = (msg.get("content") or "").replace("\n", " ").strip()
content = content[:160]
if content:
message_lines.append(f"[{ts}] {content}")
topics = "".join(base_context.get("topics_of_interest", [])) or "无明显长期话题"
recent_focus = "".join(base_context.get("recent_focus", [])) or "无明显近期话题"
return (
"你是一个微信群运营后台的成员交互摘要提取器。\n"
"你的任务不是做人设分析,也不是性格判断,而是基于公开聊天记录,提取对后续回复策略有帮助的“交互特征摘要”。\n"
"你只能依据给定聊天样本输出保守结论,不能脑补,不能做敏感推断,不能写负面标签,不能输出隐私猜测。\n"
"请根据以下成员近30天公开发言输出一个严格 JSON 对象,不要 markdown不要解释不要代码块。\n"
"JSON schema:\n"
"{"
"\"activity_level\":\"高活跃|中活跃|低活跃|观察中\","
"\"message_pattern\":\"一句中文,描述表达特点\","
"\"interaction_style\":\"一句中文,描述他在群里如何与人互动\","
"\"response_style_hint\":\"一句中文,描述适合怎样回应\","
"\"topics_of_interest\":[\"主题1\",\"主题2\"],"
"\"recent_focus\":[\"近期主题1\",\"近期主题2\"],"
"\"summary_text\":\"一段不超过120字的后台交互摘要\","
"\"confidence\":0.0,"
"\"engagement_traits\":[\"特征1\",\"特征2\"],"
"\"reply_taboos\":[\"避坑1\",\"避坑2\"]"
"}\n"
"要求:\n"
"1. 只总结群内公开行为特征,不要输出性格诊断、负面标签或敏感结论。\n"
"2. topics_of_interest 表示相对稳定的话题偏好最多5个recent_focus 表示近期频繁提及的话题最多4个。\n"
"3. message_pattern 只能描述可观察到的表达方式,例如:短句居多、问题导向、爱发链接、解释较完整、常接梗互动。\n"
"4. interaction_style 要描述他在群里的参与方式,例如:偏围观后插话、喜欢接梗、会连续追问、偏一对一回应。\n"
"5. response_style_hint 只能写对回复策略有帮助的建议,例如:先给结论再补步骤、保持简洁直接、可以适度接梗;不要写成评价语。\n"
"6. engagement_traits 最多4个写成中性的短标签例如节奏快、爱追问细节、接梗自然、偏结果导向。\n"
"7. reply_taboos 最多3个只写回复时应避免的方式例如避免长篇铺垫、避免过度说教、避免太官方。\n"
"8. summary_text 要像后台备注,客观、中性、克制,不要让人一眼看出是在给用户贴标签。\n"
"9. confidence 取值 0 到 1如果样本较少或不稳定必须降低 confidence。\n"
"10. 如果证据不足,宁可输出更弱、更泛化的结论,也不要瞎猜。\n\n"
"下面是正反例参考。\n"
"坏例子:这个人情绪化、爱抬杠、虚荣、玻璃心。\n"
"好例子:常用短句直接表达观点;遇到问题时更适合先给明确结论,再补充解释。\n\n"
f"成员标识: {display_name} ({wxid})\n"
f"群ID: {chatroom_id}\n"
f"样本消息数: {base_context.get('source_message_count', 0)}\n"
f"本地活跃度估计: {base_context.get('activity_level', '')}\n"
f"本地表达特征: {base_context.get('message_pattern', '')}\n"
f"本地互动风格: {base_context.get('interaction_style', '')}\n"
f"本地回复建议: {base_context.get('response_style_hint', '')}\n"
f"本地长期关注: {topics}\n"
f"本地近期话题: {recent_focus}\n"
"最近消息样本:\n"
+ "\n".join(message_lines)
)
def _parse_ai_answer(self, answer: str) -> Optional[Dict]:
if not answer:
return None
text = answer.strip()
match = re.search(r"\{.*\}", text, re.S)
if match:
text = match.group(0)
try:
data = json.loads(text)
except Exception:
return None
topics = data.get("topics_of_interest") or []
recent_focus = data.get("recent_focus") or []
engagement_traits = data.get("engagement_traits") or []
reply_taboos = data.get("reply_taboos") or []
if not isinstance(topics, list):
topics = []
if not isinstance(recent_focus, list):
recent_focus = []
if not isinstance(engagement_traits, list):
engagement_traits = []
if not isinstance(reply_taboos, list):
reply_taboos = []
try:
confidence = float(data.get("confidence", 0))
except Exception:
confidence = 0.0
return {
"activity_level": str(data.get("activity_level", "")).strip(),
"message_pattern": str(data.get("message_pattern", "")).strip(),
"interaction_style": str(data.get("interaction_style", "")).strip(),
"response_style_hint": str(data.get("response_style_hint", "")).strip(),
"topics_of_interest": [str(item).strip() for item in topics[:5] if str(item).strip()],
"recent_focus": [str(item).strip() for item in recent_focus[:4] if str(item).strip()],
"summary_text": str(data.get("summary_text", "")).strip(),
"confidence": max(0.0, min(1.0, confidence)),
"meta": {
"engagement_traits": [str(item).strip() for item in engagement_traits[:4] if str(item).strip()],
"reply_taboos": [str(item).strip() for item in reply_taboos[:3] if str(item).strip()],
}
}