Files
abot/plugins/member_context/service.py
liuwei 60b72874b5 feat: 重构成员画像为日周月分层沉淀链路并增强后台摘要能力
本次提交围绕成员画像插件进行了较大升级,核心目标是把原来偏单次、偏近期的成员交互摘要,升级为可随时间沉淀的分层画像能力。

主要功能变更如下:
1. 新增成员分层摘要存储表 t_member_digest,并提供对应的数据库操作层,支持按成员、按群、按摘要类型(daily/weekly/monthly)持久化周期性摘要结果。
2. 在 member_context 插件内新增 MemberDigestService,把画像生成拆分为日摘要、周摘要、月摘要三级处理流程,再由最终画像服务消费这些分层摘要,减少直接反复处理大量原始消息带来的成本和失真。
3. 新增提示词构建模块,分别为日级观察、周级归纳、月级归纳以及最终画像整理提供独立提示词,强调中性、克制、避免敏感推断,并将长期特征与近期状态明确分层。
4. 重写成员最终画像生成逻辑,优先基于日/周/月摘要融合出长期特征、习惯模式、长期回复偏好、近期状态等信息,再用 AI 对分层摘要做最终整理,避免仅依赖近 30 天消息得出偏短期结论。
5. 保留并增强长期画像融合逻辑,通过打分、衰减和重复证据累积,使长期特征随着时间逐步稳定,而不会被单次刷新完全覆盖。
6. 在消息存储层补充成员按时间增量获取、按活跃日期统计、按天取消息等查询方法,为后续分层摘要生成提供数据支撑。
7. 扩展 member_context 插件配置,增加日级摘要消息上限、日摘要最小消息数、单次回填的日摘要数量上限、最终画像使用的日/周/月摘要数量等参数,便于在准确性和系统负载之间做平衡。
8. 后台成员摘要详情页新增长期沟通倾向、长期特征、习惯模式、长期回复偏好、近期状态、历史样本数、分层摘要数量等展示字段,方便观察画像沉淀程度。
9. 优化后台查看成员摘要接口逻辑:首次打开如果还没有摘要,不再同步阻塞生成,而是返回未就绪状态,配合后台手动异步刷新,降低页面卡顿和接口阻塞风险。
10. 增强刷新日志,单成员和群级刷新会输出当前刷新模式以及日/周/月摘要数量,便于排查画像构建进度。
11. 调整当前日、当前周、当前月摘要的重算逻辑,确保新增日摘要写入后,本周和本月摘要不会长期停留在旧版本。

本次提交后,成员画像能力从“基于近期样本的单层摘要”升级为“基于时间沉淀的分层画像管线”,为后续把画像稳定接入 AI 自动回复上下文打下基础,同时尽量保持现有群权限控制和后台异步刷新方式不变。
2026-04-02 12:42:28 +08:00

712 lines
35 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import json
import re
from collections import Counter
from datetime import datetime
from typing import Dict, List, Optional
import requests
from loguru import logger
from db.connection import DBConnectionManager
from db.contacts_db import ContactsDBOperator
from db.member_context_db import MemberContextDBOperator
from db.member_digest_db import MemberDigestDBOperator
from db.message_storage import MessageStorageDB
from plugins.member_context.digest_service import MemberDigestService
from plugins.member_context.prompt_builder import MemberContextPromptBuilder
from utils.robot_cmd.robot_command import Feature, GroupBotManager, PermissionStatus
class MemberContextService:
"""成员交互摘要插件内部服务"""
FEATURE_KEY = "MEMBER_CONTEXT_CAPABILITY"
STOPWORDS = {
"这个", "那个", "就是", "然后", "怎么", "什么", "你们", "我们", "他们", "是不是", "可以",
"一下", "一个", "已经", "还有", "没有", "因为", "所以", "如果", "但是", "还是", "今天",
"昨天", "现在", "时候", "感觉", "真的", "应该", "知道", "觉得", "问题", "老师", "老板",
"群里", "大家", "一下子", "自己", "东西", "这里", "那里", "进行", "需要", "关于"
}
def __init__(self, db_manager: DBConnectionManager, plugin_config: Optional[Dict] = None):
self.db_manager = db_manager
self.contacts_db = ContactsDBOperator(self.db_manager)
self.message_db = MessageStorageDB(self.db_manager)
self.member_context_db = MemberContextDBOperator(self.db_manager)
self.member_digest_db = MemberDigestDBOperator(self.db_manager)
self.digest_service = MemberDigestService(
self.contacts_db, self.message_db, self.member_digest_db, plugin_config or {}
)
self.LOG = logger
self.plugin_config = plugin_config or {}
api_config = self.plugin_config.get("api", {})
profile_config = self.plugin_config.get("profile", {})
schedule_config = self.plugin_config.get("schedule", {})
self.ai_enabled = bool(api_config.get("enable", api_config.get("enabled", False)))
self.ai_base_url = (api_config.get("base_url") or "").rstrip("/")
self.ai_api_key = api_config.get("api_key", "")
self.ai_endpoint = str(api_config.get("endpoint", "completion-messages")).lstrip("/")
self.ai_timeout = int(api_config.get("request_timeout", 60))
self.sample_days = int(profile_config.get("sample_days", 30))
self.refresh_limit_per_member = int(profile_config.get("refresh_limit_per_member", 200))
self.active_member_hours = int(profile_config.get("active_member_hours", 72))
self.min_member_messages = int(profile_config.get("min_member_messages", 3))
self.max_members_per_group_per_run = int(profile_config.get("max_members_per_group_per_run", 30))
self.stale_hours = int(profile_config.get("stale_hours", 24))
self.stable_decay = float(profile_config.get("stable_decay", 0.96))
self.stable_max_items = int(profile_config.get("stable_max_items", 6))
self.stable_min_score = float(profile_config.get("stable_min_score", 0.9))
self.stable_ready_days = int(profile_config.get("stable_ready_days", 180))
self.only_recent_active_groups = bool(schedule_config.get("only_recent_active_groups", False))
self.active_hours = int(schedule_config.get("active_hours", 72))
self.min_group_messages = int(schedule_config.get("min_group_messages", 20))
def build_member_context(self, chatroom_id: str, wxid: str, days: Optional[int] = None,
limit: Optional[int] = None, force_digest_rebuild: bool = False) -> Dict:
days = days or self.sample_days
limit = limit or self.refresh_limit_per_member
existing_context = self.member_context_db.get_member_context(chatroom_id, wxid)
member = self.contacts_db.get_chatroom_member_info(chatroom_id, wxid) or {}
display_name = member.get("display_name") or member.get("nick_name") or wxid
digest_snapshot = self.digest_service.ensure_member_digest_pipeline(
chatroom_id, wxid, force=force_digest_rebuild
)
daily_digests = digest_snapshot.get("daily_digests", [])
weekly_digests = digest_snapshot.get("weekly_digests", [])
monthly_digests = digest_snapshot.get("monthly_digests", [])
recent_messages = self.message_db.get_member_recent_messages(chatroom_id, wxid, days=min(days, 7), limit=120)
monthly_structured = [item.get("structured", {}) or {} for item in monthly_digests]
weekly_structured = [item.get("structured", {}) or {} for item in weekly_digests]
daily_structured = [item.get("structured", {}) or {} for item in daily_digests]
observation_days = self._calc_observation_days(daily_digests)
activity_level = self._calc_activity_level(len(recent_messages), max(min(days, 7), 1))
context = {
"chatroom_id": chatroom_id,
"wxid": wxid,
"display_name": display_name,
"activity_level": activity_level,
"message_pattern": self._best_text(
daily_structured, ["message_pattern"], default=self._build_message_pattern(recent_messages)
),
"interaction_style": self._best_text(
daily_structured, ["interaction_style"], default=self._build_interaction_style(recent_messages)
),
"response_style_hint": self._build_response_style_hint_from_digests(
daily_structured, weekly_structured, monthly_structured
),
"topics_of_interest": self._extract_scored_items(
monthly_structured + weekly_structured, ["long_term_topics", "stable_topics", "topics"], limit=5
),
"recent_focus": self._extract_scored_items(daily_structured, ["topics"], limit=4),
"summary_text": "",
"confidence": self._calc_digest_confidence(monthly_digests, weekly_digests, daily_digests),
"source_message_count": len(recent_messages),
"source_days": days,
"last_profiled_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"meta": {
"stable_traits": self._extract_scored_items(
monthly_structured + weekly_structured, ["stable_traits", "engagement_traits"], limit=self.stable_max_items
),
"habit_patterns": self._extract_scored_items(
monthly_structured + weekly_structured + daily_structured,
["habit_patterns", "habit_signals"], limit=self.stable_max_items
),
"long_term_reply_preferences": self._extract_scored_items(
monthly_structured + weekly_structured, ["long_term_reply_preferences", "reply_preferences"], limit=4
),
"recent_state": self._extract_scored_items(
weekly_structured + daily_structured, ["recent_state", "phase_state", "topics"], limit=4
),
"temperament_tendency": self._best_text(
monthly_structured + weekly_structured + daily_structured,
["temperament_tendency", "temperament_signal"], default=""
),
"engagement_traits": self._extract_scored_items(
daily_structured + weekly_structured, ["engagement_traits", "stable_traits"], limit=4
),
"reply_taboos": self._extract_scored_items(daily_structured, ["reply_taboos"], limit=3),
"observation_days": observation_days,
"stable_ready": observation_days >= self.stable_ready_days,
"profile_iterations": int(((existing_context or {}).get("meta", {}) or {}).get("profile_iterations", 0)) + 1,
"history_message_count": self._sum_digest_source_count(daily_digests),
"digest_daily_count": len(daily_digests),
"digest_weekly_count": len(weekly_digests),
"digest_monthly_count": len(monthly_digests),
"last_daily_digest_at": daily_digests[0].get("last_generated_at") if daily_digests else "",
"last_weekly_digest_at": weekly_digests[0].get("last_generated_at") if weekly_digests else "",
"last_monthly_digest_at": monthly_digests[0].get("last_generated_at") if monthly_digests else "",
"refresh_mode": self._build_refresh_mode(existing_context, digest_snapshot),
},
}
ai_context = self._generate_ai_context_from_digests(
chatroom_id, wxid, display_name, monthly_digests, weekly_digests, daily_digests
)
if ai_context:
context.update({
"activity_level": ai_context.get("activity_level") or context["activity_level"],
"message_pattern": ai_context.get("message_pattern") or context["message_pattern"],
"interaction_style": ai_context.get("interaction_style") or context["interaction_style"],
"response_style_hint": ai_context.get("response_style_hint") or context["response_style_hint"],
"topics_of_interest": ai_context.get("topics_of_interest") or context["topics_of_interest"],
"recent_focus": ai_context.get("recent_focus") or context["recent_focus"],
"summary_text": ai_context.get("summary_text") or context["summary_text"],
"confidence": ai_context.get("confidence", context["confidence"]),
})
context["meta"].update(ai_context.get("meta", {}))
context = self._merge_with_existing_context(existing_context, context)
context["summary_text"] = context.get("summary_text") or self._build_summary_text_from_context(context)
return context
def refresh_member_context(self, chatroom_id: str, wxid: str, days: Optional[int] = None,
limit: Optional[int] = None) -> Dict:
if not self.is_group_enabled(chatroom_id):
raise ValueError(f"{chatroom_id} 未启用成员交互摘要功能")
self.LOG.info(f"[成员交互摘要] 开始刷新单个成员: group={chatroom_id}, wxid={wxid}")
context = self.build_member_context(chatroom_id, wxid, days=days, limit=limit)
self.member_context_db.save_member_context(context)
self.LOG.info(
f"[成员交互摘要] 单个成员刷新完成: group={chatroom_id}, wxid={wxid}, "
f"display_name={context.get('display_name', wxid)}, messages={context.get('source_message_count', 0)}, "
f"mode={context.get('meta', {}).get('refresh_mode', '')}, "
f"digests={context.get('meta', {}).get('digest_daily_count', 0)}/"
f"{context.get('meta', {}).get('digest_weekly_count', 0)}/"
f"{context.get('meta', {}).get('digest_monthly_count', 0)}, "
f"ai={'yes' if context.get('meta', {}).get('ai_provider') else 'no'}"
)
return context
def refresh_group_contexts(self, chatroom_id: str, days: Optional[int] = None,
limit_per_member: Optional[int] = None) -> Dict:
days = days or self.sample_days
limit_per_member = limit_per_member or self.refresh_limit_per_member
if not self.is_group_enabled(chatroom_id):
self.LOG.info(f"{chatroom_id} 未启用成员交互摘要功能,跳过刷新")
return {"refreshed": 0, "skipped": 0, "disabled": True}
active_members = self._get_recent_active_members(chatroom_id)
if not active_members:
self.LOG.info(f"{chatroom_id} 最近没有满足条件的活跃成员,跳过刷新")
return {"refreshed": 0, "skipped": 0, "disabled": False, "active_candidates": 0}
members = self.contacts_db.get_chatroom_member_list(chatroom_id) or []
enabled_members = {
member.get("wxid"): member for member in members
if member.get("status", 1) == 1 and member.get("wxid")
}
refreshed = 0
skipped = 0
total = len(active_members)
self.LOG.info(
f"[成员交互摘要] 开始刷新群: group={chatroom_id}, active_candidates={total}, "
f"days={days}, limit_per_member={limit_per_member}"
)
for index, active_member in enumerate(active_members, start=1):
wxid = active_member.get("wxid")
if wxid not in enabled_members:
self.LOG.debug(
f"[成员交互摘要] 跳过成员(不在当前在群名单): group={chatroom_id}, "
f"index={index}/{total}, wxid={wxid}"
)
continue
existing_context = self.member_context_db.get_member_context(chatroom_id, wxid)
if not self._should_refresh_context(existing_context, active_member):
skipped += 1
self.LOG.debug(
f"[成员交互摘要] 跳过成员(画像仍新鲜): group={chatroom_id}, "
f"index={index}/{total}, wxid={wxid}, latest_message_time={active_member.get('latest_message_time')}, "
f"last_profiled_at={(existing_context or {}).get('last_profiled_at')}"
)
continue
context = self.build_member_context(chatroom_id, wxid, days=days, limit=limit_per_member)
if context["source_message_count"] <= 0 and context.get("meta", {}).get("digest_daily_count", 0) <= 0:
skipped += 1
self.LOG.debug(
f"[成员交互摘要] 跳过成员(样本不足): group={chatroom_id}, "
f"index={index}/{total}, wxid={wxid}"
)
continue
self.member_context_db.save_member_context(context)
refreshed += 1
self.LOG.info(
f"[成员交互摘要] 刷新成员进度: group={chatroom_id}, index={index}/{total}, "
f"wxid={wxid}, display_name={context.get('display_name', wxid)}, "
f"messages={context.get('source_message_count', 0)}, "
f"activity={context.get('activity_level', '')}, "
f"mode={context.get('meta', {}).get('refresh_mode', '')}, "
f"digests={context.get('meta', {}).get('digest_daily_count', 0)}/"
f"{context.get('meta', {}).get('digest_weekly_count', 0)}/"
f"{context.get('meta', {}).get('digest_monthly_count', 0)}, "
f"ai={'yes' if context.get('meta', {}).get('ai_provider') else 'no'}"
)
self.LOG.info(
f"[成员交互摘要] 群刷新完成: group={chatroom_id}, refreshed={refreshed}, "
f"skipped={skipped}, active_candidates={total}"
)
return {"refreshed": refreshed, "skipped": skipped, "active_candidates": len(active_members)}
def refresh_all_chatrooms(self, days: Optional[int] = None, limit_per_member: Optional[int] = None) -> Dict:
days = days or self.sample_days
limit_per_member = limit_per_member or self.refresh_limit_per_member
groups = self.contacts_db.get_chatroom_list() or []
active_group_ids = self._get_recent_active_chatrooms() if self.only_recent_active_groups else None
group_count = 0
member_count = 0
skipped = 0
disabled = 0
inactive = 0
processed_groups = 0
candidate_groups = [
group.get("chatroom_id") for group in groups
if group.get("chatroom_id") and (active_group_ids is None or group.get("chatroom_id") in active_group_ids)
]
total_groups = len(candidate_groups)
self.LOG.info(
f"[成员交互摘要] 开始批量刷新: candidate_groups={total_groups}, "
f"only_recent_active_groups={self.only_recent_active_groups}, active_hours={self.active_hours}, "
f"min_group_messages={self.min_group_messages}"
)
for group in groups:
chatroom_id = group.get("chatroom_id")
if not chatroom_id:
continue
if active_group_ids is not None and chatroom_id not in active_group_ids:
inactive += 1
continue
processed_groups += 1
self.LOG.info(
f"[成员交互摘要] 批量刷新进度: group_index={processed_groups}/{total_groups}, group={chatroom_id}"
)
result = self.refresh_group_contexts(chatroom_id, days=days, limit_per_member=limit_per_member)
if result.get("disabled"):
disabled += 1
continue
group_count += 1
member_count += result["refreshed"]
skipped += result["skipped"]
self.LOG.info(
f"[成员交互摘要] 批量群结果: group={chatroom_id}, refreshed={result.get('refreshed', 0)}, "
f"skipped={result.get('skipped', 0)}, active_candidates={result.get('active_candidates', 0)}"
)
self.LOG.info(
f"成员交互摘要刷新完成: 启用活跃群={group_count}, 成员={member_count}, 跳过={skipped}, "
f"未启用群={disabled}, 非活跃群={inactive}"
)
return {"groups": group_count, "members": member_count, "skipped": skipped, "disabled_groups": disabled, "inactive_groups": inactive}
def is_group_enabled(self, chatroom_id: str) -> bool:
feature = Feature.get_feature(self.FEATURE_KEY)
if feature is None:
return True
return GroupBotManager.get_group_permission(chatroom_id, feature) == PermissionStatus.ENABLED
def _get_recent_active_members(self, chatroom_id: str) -> List[Dict]:
sql = """
SELECT
sender AS wxid,
COUNT(*) AS msg_count,
MAX(timestamp) AS latest_message_time
FROM messages
WHERE group_id = %s
AND sender IS NOT NULL
AND sender <> ''
AND timestamp >= DATE_SUB(NOW(), INTERVAL %s HOUR)
AND message_type IN (1, 49)
GROUP BY sender
HAVING COUNT(*) >= %s
ORDER BY latest_message_time DESC, msg_count DESC
LIMIT %s
"""
rows = self.message_db.execute_query(
sql,
(chatroom_id, self.active_member_hours, self.min_member_messages, self.max_members_per_group_per_run)
) or []
for row in rows:
latest_time = row.get("latest_message_time")
if isinstance(latest_time, datetime):
row["latest_message_time"] = latest_time.strftime("%Y-%m-%d %H:%M:%S")
return rows
def _should_refresh_context(self, existing_context: Optional[Dict], active_member: Dict) -> bool:
if not existing_context:
return True
latest_message_time = active_member.get("latest_message_time")
context_time = existing_context.get("last_profiled_at")
latest_dt = self._parse_datetime(latest_message_time)
context_dt = self._parse_datetime(context_time)
if not latest_dt or not context_dt:
return True
if latest_dt > context_dt and (latest_dt - context_dt).total_seconds() >= self.stale_hours * 3600:
return True
if (datetime.now() - context_dt).total_seconds() >= self.stale_hours * 3600 * 2:
return True
return False
@staticmethod
def _parse_datetime(value) -> Optional[datetime]:
if isinstance(value, datetime):
return value
if not value:
return None
try:
return datetime.strptime(str(value), "%Y-%m-%d %H:%M:%S")
except Exception:
try:
return datetime.strptime(str(value)[:10], "%Y-%m-%d")
except Exception:
return None
def _get_recent_active_chatrooms(self) -> set:
sql = """
SELECT group_id, COUNT(*) AS msg_count
FROM messages
WHERE group_id LIKE %s
AND timestamp >= DATE_SUB(NOW(), INTERVAL %s HOUR)
GROUP BY group_id
HAVING COUNT(*) >= %s
"""
rows = self.message_db.execute_query(sql, ("%@chatroom", self.active_hours, self.min_group_messages)) or []
return {row.get("group_id") for row in rows if row.get("group_id")}
def _generate_ai_context_from_digests(self, chatroom_id: str, wxid: str, display_name: str,
monthly_digests: List[Dict], weekly_digests: List[Dict],
daily_digests: List[Dict]) -> Optional[Dict]:
if not self.ai_enabled or not self.ai_base_url or not self.ai_api_key:
return None
if len(daily_digests) < 2 and len(weekly_digests) < 1 and len(monthly_digests) < 1:
return None
prompt = MemberContextPromptBuilder.build_final_context_prompt(
chatroom_id, wxid, display_name, monthly_digests, weekly_digests, daily_digests
)
headers = {
"Authorization": f"Bearer {self.ai_api_key}",
"Content-Type": "application/json",
}
payload = {
"inputs": {"query": prompt},
"response_mode": "blocking",
"user": f"member-context-final:{chatroom_id}:{wxid}",
}
url = f"{self.ai_base_url}/{self.ai_endpoint}"
try:
self.LOG.info(
f"[成员交互摘要][AI] 发起最终画像请求: group={chatroom_id}, wxid={wxid}, "
f"monthly={len(monthly_digests)}, weekly={len(weekly_digests)}, daily={len(daily_digests)}"
)
response = requests.post(url, headers=headers, json=payload, timeout=self.ai_timeout)
response.raise_for_status()
data = response.json()
parsed = self._parse_ai_answer(data.get("answer", ""))
if not parsed:
self.LOG.warning(
f"[成员交互摘要][AI] 最终画像JSON解析失败: group={chatroom_id}, wxid={wxid}, "
f"answer_preview={(data.get('answer', '') or '')[:200]}"
)
return None
usage = (data.get("metadata") or {}).get("usage", {}) or {}
parsed_meta = parsed.get("meta", {}) or {}
parsed_meta.update({
"ai_provider": "dify",
"ai_mode": "completion",
"ai_tokens": usage.get("total_tokens"),
"ai_latency": usage.get("latency"),
})
parsed["meta"] = parsed_meta
return parsed
except Exception as e:
self.LOG.warning(f"成员交互摘要最终画像 AI 生成失败,回退到本地融合: chatroom={chatroom_id}, wxid={wxid}, error={e}")
return None
def _parse_ai_answer(self, answer: str) -> Optional[Dict]:
if not answer:
return None
text = answer.strip()
match = re.search(r"\{.*\}", text, re.S)
if match:
text = match.group(0)
try:
data = json.loads(text)
except Exception:
return None
def norm_list(value, limit):
if not isinstance(value, list):
return []
return [str(item).strip() for item in value[:limit] if str(item).strip()]
try:
confidence = float(data.get("confidence", 0))
except Exception:
confidence = 0.0
return {
"activity_level": str(data.get("activity_level", "")).strip(),
"message_pattern": str(data.get("message_pattern", "")).strip(),
"interaction_style": str(data.get("interaction_style", "")).strip(),
"response_style_hint": str(data.get("response_style_hint", "")).strip(),
"topics_of_interest": norm_list(data.get("topics_of_interest"), 5),
"recent_focus": norm_list(data.get("recent_focus"), 4),
"summary_text": str(data.get("summary_text", "")).strip(),
"confidence": max(0.0, min(1.0, confidence)),
"meta": {
"stable_traits": norm_list(data.get("stable_traits"), self.stable_max_items),
"habit_patterns": norm_list(data.get("habit_patterns"), self.stable_max_items),
"long_term_reply_preferences": norm_list(data.get("long_term_reply_preferences"), 4),
"recent_state": norm_list(data.get("recent_state"), 4),
"temperament_tendency": str(data.get("temperament_tendency", "")).strip(),
"engagement_traits": norm_list(data.get("engagement_traits"), 4),
"reply_taboos": norm_list(data.get("reply_taboos"), 3),
}
}
def _merge_with_existing_context(self, existing_context: Optional[Dict], current_context: Dict) -> Dict:
existing_context = existing_context or {}
existing_meta = existing_context.get("meta", {}) or {}
meta = current_context.get("meta", {}) or {}
observation_days = max(
int(meta.get("observation_days", 0)),
int(existing_meta.get("observation_days", 0)),
)
meta["observation_days"] = observation_days
meta["stable_ready"] = observation_days >= self.stable_ready_days
merged_topic_scores = self._merge_scored_items(
existing_meta.get("topic_scores", {}),
current_context.get("topics_of_interest", []),
current_context.get("confidence", 0),
)
merged_trait_scores = self._merge_scored_items(
existing_meta.get("stable_trait_scores", {}),
meta.get("stable_traits", []),
current_context.get("confidence", 0),
)
merged_habit_scores = self._merge_scored_items(
existing_meta.get("habit_pattern_scores", {}),
meta.get("habit_patterns", []),
current_context.get("confidence", 0),
)
merged_reply_pref_scores = self._merge_scored_items(
existing_meta.get("long_term_reply_preference_scores", {}),
meta.get("long_term_reply_preferences", []),
current_context.get("confidence", 0),
)
merged_temperament_scores = self._merge_scored_items(
existing_meta.get("temperament_tendency_scores", {}),
[meta.get("temperament_tendency")] if meta.get("temperament_tendency") else [],
current_context.get("confidence", 0) * 0.9,
)
meta["topic_scores"] = merged_topic_scores
meta["stable_trait_scores"] = merged_trait_scores
meta["habit_pattern_scores"] = merged_habit_scores
meta["long_term_reply_preference_scores"] = merged_reply_pref_scores
meta["temperament_tendency_scores"] = merged_temperament_scores
meta["stable_traits"] = self._top_scored_items(merged_trait_scores, limit=self.stable_max_items)
meta["habit_patterns"] = self._top_scored_items(merged_habit_scores, limit=self.stable_max_items)
meta["long_term_reply_preferences"] = self._top_scored_items(merged_reply_pref_scores, limit=4)
temperament = self._top_scored_items(merged_temperament_scores, limit=1)
meta["temperament_tendency"] = temperament[0] if temperament else meta.get("temperament_tendency", "")
meta["engagement_traits"] = (meta.get("engagement_traits") or existing_meta.get("engagement_traits") or [])[:4]
meta["reply_taboos"] = (meta.get("reply_taboos") or existing_meta.get("reply_taboos") or [])[:3]
meta["recent_state"] = (meta.get("recent_state") or existing_meta.get("recent_state") or [])[:4]
meta["profile_iterations"] = max(
int(meta.get("profile_iterations", 0)),
int(existing_meta.get("profile_iterations", 0)),
)
meta["history_message_count"] = max(
int(meta.get("history_message_count", 0)),
int(existing_meta.get("history_message_count", 0)),
)
current_context["topics_of_interest"] = self._top_scored_items(merged_topic_scores, limit=5) or current_context.get("topics_of_interest", [])
current_context["recent_focus"] = (current_context.get("recent_focus") or existing_context.get("recent_focus") or [])[:4]
current_context["response_style_hint"] = current_context.get("response_style_hint") or existing_context.get("response_style_hint") or ""
current_context["meta"] = meta
return current_context
def _extract_scored_items(self, items: List[Dict], keys: List[str], limit: int) -> List[str]:
scores = {}
for index, item in enumerate(items):
weight = max(0.5, 1.2 - index * 0.08)
for key in keys:
values = item.get(key, [])
if not isinstance(values, list):
continue
for value in values:
normalized = str(value).strip()
if not normalized:
continue
scores[normalized] = scores.get(normalized, 0.0) + weight
return [key for key, _ in sorted(scores.items(), key=lambda pair: pair[1], reverse=True)[:limit]]
def _best_text(self, items: List[Dict], keys: List[str], default: str = "") -> str:
counter = Counter()
for item in items:
for key in keys:
value = str(item.get(key, "")).strip()
if value:
counter[value] += 1
if counter:
return counter.most_common(1)[0][0]
return default
def _build_response_style_hint_from_digests(self, daily_structured: List[Dict],
weekly_structured: List[Dict],
monthly_structured: List[Dict]) -> str:
hint = self._best_text(daily_structured, ["response_style_hint"])
if hint:
return hint
preferences = self._extract_scored_items(
monthly_structured + weekly_structured,
["long_term_reply_preferences", "reply_preferences"],
limit=3,
)
if preferences:
return "更适合:" + "".join(preferences[:3])
return "保持自然口语化,结论和解释尽量平衡"
def _calc_digest_confidence(self, monthly_digests: List[Dict], weekly_digests: List[Dict],
daily_digests: List[Dict]) -> float:
base = 0.25
base += min(0.35, len(monthly_digests) * 0.08)
base += min(0.2, len(weekly_digests) * 0.04)
base += min(0.15, len(daily_digests) * 0.02)
return round(min(0.95, base), 2)
def _calc_observation_days(self, daily_digests: List[Dict]) -> int:
if not daily_digests:
return 0
end_dt = self._parse_datetime(daily_digests[0].get("period_end"))
start_dt = self._parse_datetime(daily_digests[-1].get("period_start"))
if not start_dt or not end_dt:
return 0
return max(0, (end_dt - start_dt).days)
@staticmethod
def _sum_digest_source_count(daily_digests: List[Dict]) -> int:
return sum(int(item.get("source_count", 0)) for item in daily_digests)
def _build_refresh_mode(self, existing_context: Optional[Dict], digest_snapshot: Dict) -> str:
if not existing_context:
return "bootstrap"
if (digest_snapshot.get("stats", {}) or {}).get("built_monthly", 0) > 0:
return "recalibration"
return "incremental"
def _build_summary_text_from_context(self, context: Dict) -> str:
meta = context.get("meta", {}) or {}
parts = []
if meta.get("temperament_tendency"):
label = "长期沟通倾向" if meta.get("stable_ready") else "阶段性沟通倾向"
parts.append(f"{label}{meta.get('temperament_tendency')}")
if meta.get("stable_traits"):
parts.append(f"长期特征:{''.join(meta.get('stable_traits')[:3])}")
if meta.get("habit_patterns"):
parts.append(f"习惯模式:{''.join(meta.get('habit_patterns')[:3])}")
if meta.get("recent_state"):
parts.append(f"近期状态:{''.join(meta.get('recent_state')[:3])}")
if context.get("response_style_hint"):
parts.append(f"回复建议:{context.get('response_style_hint')}")
return "".join(parts[:5])
def _merge_scored_items(self, existing_scores: Dict, current_items: List[str], confidence: float) -> Dict[str, float]:
merged = {}
for key, value in (existing_scores or {}).items():
try:
score = float(value) * self.stable_decay
except Exception:
continue
if score >= 0.2:
merged[str(key).strip()] = round(score, 4)
boost = max(0.6, min(1.8, 0.8 + confidence))
for item in current_items or []:
normalized = str(item).strip()
if not normalized:
continue
merged[normalized] = round(merged.get(normalized, 0.0) + boost, 4)
return merged
def _top_scored_items(self, scores: Dict, limit: int) -> List[str]:
ordered = sorted(
((str(key).strip(), float(value)) for key, value in (scores or {}).items() if str(key).strip()),
key=lambda item: item[1],
reverse=True,
)
return [key for key, value in ordered if value >= self.stable_min_score][:limit]
def _calc_activity_level(self, message_count: int, days: int) -> str:
daily_avg = message_count / max(days, 1)
if message_count >= 80 or daily_avg >= 3:
return "高活跃"
if message_count >= 25 or daily_avg >= 1:
return "中活跃"
if message_count > 0:
return "低活跃"
return "观察中"
def _build_message_pattern(self, messages: List[Dict]) -> str:
if not messages:
return "样本较少,暂不做明显模式判断"
contents = [m.get("content", "") for m in messages if m.get("content")]
if not contents:
return "样本较少,暂不做明显模式判断"
avg_len = sum(len(c) for c in contents) / len(contents)
question_ratio = sum(1 for c in contents if "?" in c or "" in c) / len(contents)
link_ratio = sum(1 for c in contents if "http://" in c or "https://" in c) / len(contents)
traits = []
if avg_len <= 12:
traits.append("短句居多")
elif avg_len >= 35:
traits.append("表达较完整")
else:
traits.append("表达中等长度")
if question_ratio >= 0.35:
traits.append("问题导向明显")
elif question_ratio >= 0.15:
traits.append("偶尔连续追问")
if link_ratio >= 0.15:
traits.append("常分享链接或资料")
return "".join(traits or ["发言较平稳"])
def _build_interaction_style(self, messages: List[Dict]) -> str:
if not messages:
return "互动样本较少"
contents = [m.get("content", "") for m in messages if m.get("content")]
question_ratio = sum(1 for c in contents if "?" in c or "" in c) / max(len(contents), 1)
emoji_ratio = sum(1 for c in contents if re.search(r"[\U0001F300-\U0001FAFF\u2600-\u27BF]", c)) / max(len(contents), 1)
mention_ratio = sum(1 for c in contents if "@" in c) / max(len(contents), 1)
parts = []
if question_ratio >= 0.3:
parts.append("偏提问推进")
if emoji_ratio >= 0.15:
parts.append("表情互动感较强")
if mention_ratio >= 0.1:
parts.append("会主动点名互动")
return "".join(parts or ["自然跟随式互动"])