453 lines
22 KiB
Python
453 lines
22 KiB
Python
# -*- coding: utf-8 -*-
|
||
import json
|
||
import math
|
||
import re
|
||
from collections import Counter
|
||
from datetime import datetime
|
||
from typing import Dict, List, Optional
|
||
|
||
import requests
|
||
from loguru import logger
|
||
|
||
from db.connection import DBConnectionManager
|
||
from db.contacts_db import ContactsDBOperator
|
||
from db.member_context_db import MemberContextDBOperator
|
||
from db.message_storage import MessageStorageDB
|
||
from utils.robot_cmd.robot_command import Feature, GroupBotManager, PermissionStatus
|
||
|
||
|
||
class MemberContextService:
|
||
"""成员交互摘要插件内部服务"""
|
||
|
||
FEATURE_KEY = "MEMBER_CONTEXT_CAPABILITY"
|
||
|
||
STOPWORDS = {
|
||
"这个", "那个", "就是", "然后", "怎么", "什么", "你们", "我们", "他们", "是不是", "可以",
|
||
"一下", "一个", "已经", "还有", "没有", "因为", "所以", "如果", "但是", "还是", "今天",
|
||
"昨天", "现在", "时候", "感觉", "真的", "应该", "知道", "觉得", "问题", "老师", "老板",
|
||
"群里", "大家", "一下子", "自己", "东西", "这里", "那里", "进行", "需要", "关于"
|
||
}
|
||
|
||
def __init__(self, db_manager: DBConnectionManager, plugin_config: Optional[Dict] = None):
|
||
self.db_manager = db_manager
|
||
self.contacts_db = ContactsDBOperator(self.db_manager)
|
||
self.message_db = MessageStorageDB(self.db_manager)
|
||
self.member_context_db = MemberContextDBOperator(self.db_manager)
|
||
self.LOG = logger
|
||
self.plugin_config = plugin_config or {}
|
||
|
||
api_config = self.plugin_config.get("api", {})
|
||
profile_config = self.plugin_config.get("profile", {})
|
||
|
||
self.ai_enabled = bool(api_config.get("enabled", False))
|
||
self.ai_base_url = (api_config.get("base_url") or "").rstrip("/")
|
||
self.ai_api_key = api_config.get("api_key", "")
|
||
self.ai_endpoint = str(api_config.get("endpoint", "completion-messages")).lstrip("/")
|
||
self.ai_timeout = int(api_config.get("request_timeout", 60))
|
||
self.sample_days = int(profile_config.get("sample_days", 30))
|
||
self.ai_sample_limit = int(profile_config.get("sample_message_limit", 80))
|
||
self.refresh_limit_per_member = int(profile_config.get("refresh_limit_per_member", 200))
|
||
schedule_config = self.plugin_config.get("schedule", {})
|
||
self.only_recent_active_groups = bool(schedule_config.get("only_recent_active_groups", False))
|
||
self.active_hours = int(schedule_config.get("active_hours", 72))
|
||
self.min_group_messages = int(schedule_config.get("min_group_messages", 20))
|
||
|
||
def build_member_context(self, chatroom_id: str, wxid: str, days: Optional[int] = None,
|
||
limit: Optional[int] = None) -> Dict:
|
||
days = days or self.sample_days
|
||
limit = limit or self.refresh_limit_per_member
|
||
|
||
member = self.contacts_db.get_chatroom_member_info(chatroom_id, wxid) or {}
|
||
messages = self.message_db.get_member_recent_messages(chatroom_id, wxid, days=days, limit=limit)
|
||
recent_messages = self.message_db.get_member_recent_messages(chatroom_id, wxid, days=min(days, 7), limit=100)
|
||
|
||
display_name = member.get("display_name") or member.get("nick_name") or wxid
|
||
activity_level = self._calc_activity_level(len(messages), days)
|
||
message_pattern = self._build_message_pattern(messages)
|
||
response_style_hint = self._build_response_style_hint(messages)
|
||
topics = self._extract_keywords(messages, limit=5)
|
||
recent_focus = self._extract_keywords(recent_messages, limit=4)
|
||
confidence = self._calc_confidence(len(messages))
|
||
|
||
context = {
|
||
"chatroom_id": chatroom_id,
|
||
"wxid": wxid,
|
||
"display_name": display_name,
|
||
"activity_level": activity_level,
|
||
"message_pattern": message_pattern,
|
||
"interaction_style": self._build_interaction_style(messages),
|
||
"response_style_hint": response_style_hint,
|
||
"topics_of_interest": topics,
|
||
"recent_focus": recent_focus,
|
||
"summary_text": self._build_summary_text(activity_level, message_pattern, response_style_hint, topics, recent_focus),
|
||
"confidence": confidence,
|
||
"source_message_count": len(messages),
|
||
"source_days": days,
|
||
"last_profiled_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||
"meta": self._build_meta(messages, recent_messages),
|
||
}
|
||
|
||
ai_context = self._generate_ai_context(chatroom_id, wxid, display_name, context, messages)
|
||
if ai_context:
|
||
context.update({
|
||
"activity_level": ai_context.get("activity_level") or context["activity_level"],
|
||
"message_pattern": ai_context.get("message_pattern") or context["message_pattern"],
|
||
"interaction_style": ai_context.get("interaction_style") or context["interaction_style"],
|
||
"response_style_hint": ai_context.get("response_style_hint") or context["response_style_hint"],
|
||
"topics_of_interest": ai_context.get("topics_of_interest") or context["topics_of_interest"],
|
||
"recent_focus": ai_context.get("recent_focus") or context["recent_focus"],
|
||
"summary_text": ai_context.get("summary_text") or context["summary_text"],
|
||
"confidence": ai_context.get("confidence", context["confidence"]),
|
||
})
|
||
context["meta"].update(ai_context.get("meta", {}))
|
||
return context
|
||
|
||
def refresh_member_context(self, chatroom_id: str, wxid: str, days: Optional[int] = None,
|
||
limit: Optional[int] = None) -> Dict:
|
||
if not self.is_group_enabled(chatroom_id):
|
||
raise ValueError(f"群 {chatroom_id} 未启用成员交互摘要功能")
|
||
context = self.build_member_context(chatroom_id, wxid, days=days, limit=limit)
|
||
self.member_context_db.save_member_context(context)
|
||
return context
|
||
|
||
def refresh_group_contexts(self, chatroom_id: str, days: Optional[int] = None,
|
||
limit_per_member: Optional[int] = None) -> Dict:
|
||
days = days or self.sample_days
|
||
limit_per_member = limit_per_member or self.refresh_limit_per_member
|
||
|
||
if not self.is_group_enabled(chatroom_id):
|
||
self.LOG.info(f"群 {chatroom_id} 未启用成员交互摘要功能,跳过刷新")
|
||
return {"refreshed": 0, "skipped": 0, "disabled": True}
|
||
|
||
members = self.contacts_db.get_chatroom_member_list(chatroom_id) or []
|
||
refreshed = 0
|
||
skipped = 0
|
||
|
||
for member in members:
|
||
if member.get("status", 1) != 1:
|
||
continue
|
||
wxid = member.get("wxid")
|
||
if not wxid:
|
||
continue
|
||
context = self.build_member_context(chatroom_id, wxid, days=days, limit=limit_per_member)
|
||
if context["source_message_count"] <= 0:
|
||
skipped += 1
|
||
continue
|
||
self.member_context_db.save_member_context(context)
|
||
refreshed += 1
|
||
|
||
return {"refreshed": refreshed, "skipped": skipped}
|
||
|
||
def refresh_all_chatrooms(self, days: Optional[int] = None, limit_per_member: Optional[int] = None) -> Dict:
|
||
days = days or self.sample_days
|
||
limit_per_member = limit_per_member or self.refresh_limit_per_member
|
||
|
||
groups = self.contacts_db.get_chatroom_list() or []
|
||
active_group_ids = self._get_recent_active_chatrooms() if self.only_recent_active_groups else None
|
||
group_count = 0
|
||
member_count = 0
|
||
skipped = 0
|
||
disabled = 0
|
||
inactive = 0
|
||
|
||
for group in groups:
|
||
chatroom_id = group.get("chatroom_id")
|
||
if not chatroom_id:
|
||
continue
|
||
if active_group_ids is not None and chatroom_id not in active_group_ids:
|
||
inactive += 1
|
||
continue
|
||
result = self.refresh_group_contexts(chatroom_id, days=days, limit_per_member=limit_per_member)
|
||
if result.get("disabled"):
|
||
disabled += 1
|
||
continue
|
||
group_count += 1
|
||
member_count += result["refreshed"]
|
||
skipped += result["skipped"]
|
||
|
||
self.LOG.info(f"成员交互摘要刷新完成: 启用活跃群={group_count}, 成员={member_count}, 跳过={skipped}, 未启用群={disabled}, 非活跃群={inactive}")
|
||
return {"groups": group_count, "members": member_count, "skipped": skipped, "disabled_groups": disabled, "inactive_groups": inactive}
|
||
|
||
def is_group_enabled(self, chatroom_id: str) -> bool:
|
||
feature = Feature.get_feature(self.FEATURE_KEY)
|
||
if feature is None:
|
||
return True
|
||
return GroupBotManager.get_group_permission(chatroom_id, feature) == PermissionStatus.ENABLED
|
||
|
||
def _calc_activity_level(self, message_count: int, days: int) -> str:
|
||
daily_avg = message_count / max(days, 1)
|
||
if message_count >= 80 or daily_avg >= 3:
|
||
return "高活跃"
|
||
if message_count >= 25 or daily_avg >= 1:
|
||
return "中活跃"
|
||
if message_count > 0:
|
||
return "低活跃"
|
||
return "观察中"
|
||
|
||
def _build_message_pattern(self, messages: List[Dict]) -> str:
|
||
if not messages:
|
||
return "样本较少,暂不做明显模式判断"
|
||
|
||
contents = [m.get("content", "") for m in messages if m.get("content")]
|
||
if not contents:
|
||
return "样本较少,暂不做明显模式判断"
|
||
|
||
avg_len = sum(len(c) for c in contents) / len(contents)
|
||
question_ratio = sum(1 for c in contents if "?" in c or "?" in c) / len(contents)
|
||
link_ratio = sum(1 for c in contents if "http://" in c or "https://" in c) / len(contents)
|
||
|
||
traits = []
|
||
if avg_len <= 12:
|
||
traits.append("短句居多")
|
||
elif avg_len >= 35:
|
||
traits.append("表达较完整")
|
||
else:
|
||
traits.append("表达中等长度")
|
||
|
||
if question_ratio >= 0.35:
|
||
traits.append("问题导向明显")
|
||
elif question_ratio >= 0.15:
|
||
traits.append("偶尔连续追问")
|
||
|
||
if link_ratio >= 0.15:
|
||
traits.append("常分享链接或资料")
|
||
|
||
if not traits:
|
||
traits.append("发言较平稳")
|
||
return ",".join(traits)
|
||
|
||
def _build_response_style_hint(self, messages: List[Dict]) -> str:
|
||
if not messages:
|
||
return "样本不足时保持中性、简洁、避免过度熟络"
|
||
|
||
contents = [m.get("content", "") for m in messages if m.get("content")]
|
||
avg_len = sum(len(c) for c in contents) / max(len(contents), 1)
|
||
question_ratio = sum(1 for c in contents if "?" in c or "?" in c) / max(len(contents), 1)
|
||
|
||
if question_ratio >= 0.35:
|
||
return "优先给明确结论,再补充步骤或依据,避免空泛回应"
|
||
if avg_len <= 12:
|
||
return "回复尽量简洁直接,先回答核心点,减少铺垫"
|
||
if avg_len >= 35:
|
||
return "可以给稍完整的解释,但保持结构清楚,避免冗长"
|
||
return "保持自然口语化,结论和解释尽量平衡"
|
||
|
||
def _build_interaction_style(self, messages: List[Dict]) -> str:
|
||
if not messages:
|
||
return "互动样本较少"
|
||
contents = [m.get("content", "") for m in messages if m.get("content")]
|
||
question_ratio = sum(1 for c in contents if "?" in c or "?" in c) / max(len(contents), 1)
|
||
emoji_ratio = sum(1 for c in contents if re.search(r"[\U0001F300-\U0001FAFF\u2600-\u27BF]", c)) / max(len(contents), 1)
|
||
mention_ratio = sum(1 for c in contents if "@" in c) / max(len(contents), 1)
|
||
|
||
parts = []
|
||
if question_ratio >= 0.3:
|
||
parts.append("偏提问推进")
|
||
if emoji_ratio >= 0.15:
|
||
parts.append("表情互动感较强")
|
||
if mention_ratio >= 0.1:
|
||
parts.append("会主动点名互动")
|
||
if not parts:
|
||
parts.append("自然跟随式互动")
|
||
return ",".join(parts)
|
||
|
||
def _extract_keywords(self, messages: List[Dict], limit: int = 5) -> List[str]:
|
||
counter = Counter()
|
||
for message in messages:
|
||
content = message.get("content", "")
|
||
for token in self._tokenize(content):
|
||
if token in self.STOPWORDS:
|
||
continue
|
||
counter[token] += 1
|
||
return [word for word, _ in counter.most_common(limit)]
|
||
|
||
def _tokenize(self, text: str) -> List[str]:
|
||
chinese_words = re.findall(r"[\u4e00-\u9fff]{2,6}", text)
|
||
english_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", text)
|
||
return chinese_words + [word.lower() for word in english_words]
|
||
|
||
def _calc_confidence(self, message_count: int) -> float:
|
||
return round(min(0.95, math.log(message_count + 1, 10)), 2) if message_count > 0 else 0.1
|
||
|
||
def _build_summary_text(self, activity_level: str, message_pattern: str,
|
||
response_style_hint: str, topics: List[str], recent_focus: List[str]) -> str:
|
||
parts = [
|
||
f"近期互动强度:{activity_level}",
|
||
f"表达特征:{message_pattern}",
|
||
f"回复建议:{response_style_hint}",
|
||
]
|
||
if topics:
|
||
parts.append(f"长期关注:{'、'.join(topics)}")
|
||
if recent_focus:
|
||
parts.append(f"近期话题:{'、'.join(recent_focus)}")
|
||
return ";".join(parts)
|
||
|
||
def _build_meta(self, messages: List[Dict], recent_messages: List[Dict]) -> Dict:
|
||
latest_time = None
|
||
if recent_messages:
|
||
latest = recent_messages[-1].get("timestamp")
|
||
if isinstance(latest, datetime):
|
||
latest_time = latest.strftime("%Y-%m-%d %H:%M:%S")
|
||
elif latest:
|
||
latest_time = str(latest)
|
||
|
||
return {
|
||
"message_count_30d": len(messages),
|
||
"message_count_7d": len(recent_messages),
|
||
"latest_message_time": latest_time,
|
||
}
|
||
|
||
def _get_recent_active_chatrooms(self) -> set:
|
||
sql = """
|
||
SELECT group_id, COUNT(*) AS msg_count
|
||
FROM messages
|
||
WHERE group_id LIKE %s
|
||
AND timestamp >= DATE_SUB(NOW(), INTERVAL %s HOUR)
|
||
GROUP BY group_id
|
||
HAVING COUNT(*) >= %s
|
||
"""
|
||
rows = self.message_db.execute_query(sql, ("%@chatroom", self.active_hours, self.min_group_messages)) or []
|
||
return {row.get("group_id") for row in rows if row.get("group_id")}
|
||
|
||
def _generate_ai_context(self, chatroom_id: str, wxid: str, display_name: str,
|
||
base_context: Dict, messages: List[Dict]) -> Optional[Dict]:
|
||
if not self.ai_enabled or not self.ai_base_url or not self.ai_api_key:
|
||
return None
|
||
if len(messages) < 8:
|
||
return None
|
||
|
||
prompt = self._build_ai_prompt(chatroom_id, wxid, display_name, base_context, messages[-self.ai_sample_limit:])
|
||
headers = {
|
||
"Authorization": f"Bearer {self.ai_api_key}",
|
||
"Content-Type": "application/json",
|
||
}
|
||
payload = {
|
||
"inputs": {"query": prompt},
|
||
"response_mode": "blocking",
|
||
"user": f"member-context:{chatroom_id}:{wxid}",
|
||
}
|
||
url = f"{self.ai_base_url}/{self.ai_endpoint}"
|
||
try:
|
||
response = requests.post(url, headers=headers, json=payload, timeout=self.ai_timeout)
|
||
response.raise_for_status()
|
||
response_data = response.json()
|
||
parsed = self._parse_ai_answer(response_data.get("answer", ""))
|
||
if not parsed:
|
||
return None
|
||
usage = (response_data.get("metadata") or {}).get("usage", {}) or {}
|
||
parsed["meta"] = {
|
||
"ai_provider": "dify",
|
||
"ai_mode": "completion",
|
||
"ai_tokens": usage.get("total_tokens"),
|
||
"ai_latency": usage.get("latency"),
|
||
}
|
||
return parsed
|
||
except Exception as e:
|
||
self.LOG.warning(f"成员交互摘要 AI 生成失败,回退到本地摘要: chatroom={chatroom_id}, wxid={wxid}, error={e}")
|
||
return None
|
||
|
||
def _build_ai_prompt(self, chatroom_id: str, wxid: str, display_name: str,
|
||
base_context: Dict, messages: List[Dict]) -> str:
|
||
message_lines = []
|
||
for msg in messages[-40:]:
|
||
ts = msg.get("timestamp")
|
||
if isinstance(ts, datetime):
|
||
ts = ts.strftime("%m-%d %H:%M")
|
||
content = (msg.get("content") or "").replace("\n", " ").strip()
|
||
content = content[:160]
|
||
if content:
|
||
message_lines.append(f"[{ts}] {content}")
|
||
|
||
topics = "、".join(base_context.get("topics_of_interest", [])) or "无明显长期话题"
|
||
recent_focus = "、".join(base_context.get("recent_focus", [])) or "无明显近期话题"
|
||
|
||
return (
|
||
"你是一个微信群运营后台的成员交互摘要提取器。\n"
|
||
"你的任务不是做人设分析,也不是性格判断,而是基于公开聊天记录,提取对后续回复策略有帮助的“交互特征摘要”。\n"
|
||
"你只能依据给定聊天样本输出保守结论,不能脑补,不能做敏感推断,不能写负面标签,不能输出隐私猜测。\n"
|
||
"请根据以下成员近30天公开发言,输出一个严格 JSON 对象,不要 markdown,不要解释,不要代码块。\n"
|
||
"JSON schema:\n"
|
||
"{"
|
||
"\"activity_level\":\"高活跃|中活跃|低活跃|观察中\","
|
||
"\"message_pattern\":\"一句中文,描述表达特点\","
|
||
"\"interaction_style\":\"一句中文,描述他在群里如何与人互动\","
|
||
"\"response_style_hint\":\"一句中文,描述适合怎样回应\","
|
||
"\"topics_of_interest\":[\"主题1\",\"主题2\"],"
|
||
"\"recent_focus\":[\"近期主题1\",\"近期主题2\"],"
|
||
"\"summary_text\":\"一段不超过120字的后台交互摘要\","
|
||
"\"confidence\":0.0,"
|
||
"\"engagement_traits\":[\"特征1\",\"特征2\"],"
|
||
"\"reply_taboos\":[\"避坑1\",\"避坑2\"]"
|
||
"}\n"
|
||
"要求:\n"
|
||
"1. 只总结群内公开行为特征,不要输出性格诊断、负面标签或敏感结论。\n"
|
||
"2. topics_of_interest 表示相对稳定的话题偏好,最多5个;recent_focus 表示近期频繁提及的话题,最多4个。\n"
|
||
"3. message_pattern 只能描述可观察到的表达方式,例如:短句居多、问题导向、爱发链接、解释较完整、常接梗互动。\n"
|
||
"4. interaction_style 要描述他在群里的参与方式,例如:偏围观后插话、喜欢接梗、会连续追问、偏一对一回应。\n"
|
||
"5. response_style_hint 只能写对回复策略有帮助的建议,例如:先给结论再补步骤、保持简洁直接、可以适度接梗;不要写成评价语。\n"
|
||
"6. engagement_traits 最多4个,写成中性的短标签,例如:节奏快、爱追问细节、接梗自然、偏结果导向。\n"
|
||
"7. reply_taboos 最多3个,只写回复时应避免的方式,例如:避免长篇铺垫、避免过度说教、避免太官方。\n"
|
||
"8. summary_text 要像后台备注,客观、中性、克制,不要让人一眼看出是在给用户贴标签。\n"
|
||
"9. confidence 取值 0 到 1;如果样本较少或不稳定,必须降低 confidence。\n"
|
||
"10. 如果证据不足,宁可输出更弱、更泛化的结论,也不要瞎猜。\n\n"
|
||
"下面是正反例参考。\n"
|
||
"坏例子:这个人情绪化、爱抬杠、虚荣、玻璃心。\n"
|
||
"好例子:常用短句直接表达观点;遇到问题时更适合先给明确结论,再补充解释。\n\n"
|
||
f"成员标识: {display_name} ({wxid})\n"
|
||
f"群ID: {chatroom_id}\n"
|
||
f"样本消息数: {base_context.get('source_message_count', 0)}\n"
|
||
f"本地活跃度估计: {base_context.get('activity_level', '')}\n"
|
||
f"本地表达特征: {base_context.get('message_pattern', '')}\n"
|
||
f"本地互动风格: {base_context.get('interaction_style', '')}\n"
|
||
f"本地回复建议: {base_context.get('response_style_hint', '')}\n"
|
||
f"本地长期关注: {topics}\n"
|
||
f"本地近期话题: {recent_focus}\n"
|
||
"最近消息样本:\n"
|
||
+ "\n".join(message_lines)
|
||
)
|
||
|
||
def _parse_ai_answer(self, answer: str) -> Optional[Dict]:
|
||
if not answer:
|
||
return None
|
||
text = answer.strip()
|
||
match = re.search(r"\{.*\}", text, re.S)
|
||
if match:
|
||
text = match.group(0)
|
||
try:
|
||
data = json.loads(text)
|
||
except Exception:
|
||
return None
|
||
|
||
topics = data.get("topics_of_interest") or []
|
||
recent_focus = data.get("recent_focus") or []
|
||
engagement_traits = data.get("engagement_traits") or []
|
||
reply_taboos = data.get("reply_taboos") or []
|
||
if not isinstance(topics, list):
|
||
topics = []
|
||
if not isinstance(recent_focus, list):
|
||
recent_focus = []
|
||
if not isinstance(engagement_traits, list):
|
||
engagement_traits = []
|
||
if not isinstance(reply_taboos, list):
|
||
reply_taboos = []
|
||
|
||
try:
|
||
confidence = float(data.get("confidence", 0))
|
||
except Exception:
|
||
confidence = 0.0
|
||
|
||
return {
|
||
"activity_level": str(data.get("activity_level", "")).strip(),
|
||
"message_pattern": str(data.get("message_pattern", "")).strip(),
|
||
"interaction_style": str(data.get("interaction_style", "")).strip(),
|
||
"response_style_hint": str(data.get("response_style_hint", "")).strip(),
|
||
"topics_of_interest": [str(item).strip() for item in topics[:5] if str(item).strip()],
|
||
"recent_focus": [str(item).strip() for item in recent_focus[:4] if str(item).strip()],
|
||
"summary_text": str(data.get("summary_text", "")).strip(),
|
||
"confidence": max(0.0, min(1.0, confidence)),
|
||
"meta": {
|
||
"engagement_traits": [str(item).strip() for item in engagement_traits[:4] if str(item).strip()],
|
||
"reply_taboos": [str(item).strip() for item in reply_taboos[:3] if str(item).strip()],
|
||
}
|
||
}
|