完善成员画像插件的日/周/月分层提取与已结束日期处理逻辑
- 将成员画像能力进一步收敛到插件内部,强化按群启用、后台异步刷新、后台查看的完整链路 - 新增群维度按日批量提取能力:以群为单位按天处理一次,统一提取当天活跃成员的日级画像摘要 - 日级画像输出扩展为更适合长期累计的结构化信号,补充身份线索、技能信号、家庭线索、阶段线索、价值偏好、群内角色、决策风格等字段 - 优化提示词设计,明确要求优先提取可复用、可累计、可验证的行为线索,减少一次性情绪和短期噪声对长期画像的干扰 - 打通日 -> 周 -> 月 -> 最终画像 的分层汇总链路,让后续月度画像直接消费日/周级结构化摘要,而不是重复回扫长窗口原始消息 - 新增/完善画像融合策略:identity_traits、skill_profile、family_profile、life_stage_profile、value_profile 也纳入长期分数累计,不再仅依赖最近一次结果覆盖旧结果 - 将活跃群、活跃成员、辅助消息样本等口径统一调整为只处理已结束日期,避免当天未完结数据进入画像计算 - 调整日级批处理逻辑,默认只处理昨天及更早日期,确保不会处理当天消息 - 修复重复执行时仍然先调用 AI 再跳过的问题,改为先检查当天候选成员是否已完成生成,全部已存在时直接跳过,减少无效 AI 请求和耗时 - 增加群日批处理、周摘要、月摘要、群刷新进度等日志,方便后台定位当前刷新到哪些群、哪些成员、进度如何 - 丰富后台画像展示字段,支持查看更完整的长期画像维度与摘要统计 - 更新插件配置默认值,收敛为近 60 天启动窗口、每日滚动处理与群级日摘要模式 - 补充 message_storage 读取能力,支持按群按日提取消息,为群日批量画像与后续周期汇总提供底层数据支撑
This commit is contained in:
@@ -67,7 +67,8 @@ class MemberContextService:
|
||||
self.min_group_messages = int(schedule_config.get("min_group_messages", 20))
|
||||
|
||||
def build_member_context(self, chatroom_id: str, wxid: str, days: Optional[int] = None,
|
||||
limit: Optional[int] = None, force_digest_rebuild: bool = False) -> Dict:
|
||||
limit: Optional[int] = None, force_digest_rebuild: bool = False,
|
||||
ensure_group_daily: bool = True) -> Dict:
|
||||
days = days or self.sample_days
|
||||
limit = limit or self.refresh_limit_per_member
|
||||
|
||||
@@ -75,6 +76,11 @@ class MemberContextService:
|
||||
member = self.contacts_db.get_chatroom_member_info(chatroom_id, wxid) or {}
|
||||
display_name = member.get("display_name") or member.get("nick_name") or wxid
|
||||
|
||||
group_digest_stats = {"built_daily": 0, "touched_members": []}
|
||||
if ensure_group_daily:
|
||||
group_digest_stats = self.digest_service.ensure_recent_group_daily_digests(
|
||||
chatroom_id, force=force_digest_rebuild
|
||||
)
|
||||
digest_snapshot = self.digest_service.ensure_member_digest_pipeline(
|
||||
chatroom_id, wxid, force=force_digest_rebuild
|
||||
)
|
||||
@@ -82,7 +88,13 @@ class MemberContextService:
|
||||
weekly_digests = digest_snapshot.get("weekly_digests", [])
|
||||
monthly_digests = digest_snapshot.get("monthly_digests", [])
|
||||
|
||||
recent_messages = self.message_db.get_member_recent_messages(chatroom_id, wxid, days=min(days, 7), limit=120)
|
||||
recent_messages = self.message_db.get_member_recent_messages(
|
||||
chatroom_id,
|
||||
wxid,
|
||||
days=min(days, 7),
|
||||
limit=120,
|
||||
include_today=False,
|
||||
)
|
||||
monthly_structured = [item.get("structured", {}) or {} for item in monthly_digests]
|
||||
weekly_structured = [item.get("structured", {}) or {} for item in weekly_digests]
|
||||
daily_structured = [item.get("structured", {}) or {} for item in daily_digests]
|
||||
@@ -113,6 +125,26 @@ class MemberContextService:
|
||||
"source_days": days,
|
||||
"last_profiled_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"meta": {
|
||||
"identity_traits": self._extract_scored_items(
|
||||
monthly_structured + weekly_structured + daily_structured,
|
||||
["identity_traits", "identity_clues"], limit=5
|
||||
),
|
||||
"skill_profile": self._extract_scored_items(
|
||||
monthly_structured + weekly_structured + daily_structured,
|
||||
["skill_profile", "skill_signals"], limit=6
|
||||
),
|
||||
"family_profile": self._extract_scored_items(
|
||||
monthly_structured + weekly_structured + daily_structured,
|
||||
["family_profile", "family_signals"], limit=4
|
||||
),
|
||||
"life_stage_profile": self._extract_scored_items(
|
||||
monthly_structured + weekly_structured + daily_structured,
|
||||
["life_stage_profile", "life_stage_signals"], limit=4
|
||||
),
|
||||
"value_profile": self._extract_scored_items(
|
||||
monthly_structured + weekly_structured + daily_structured,
|
||||
["value_profile", "value_preferences"], limit=5
|
||||
),
|
||||
"stable_traits": self._extract_scored_items(
|
||||
monthly_structured + weekly_structured, ["stable_traits", "engagement_traits"], limit=self.stable_max_items
|
||||
),
|
||||
@@ -130,6 +162,14 @@ class MemberContextService:
|
||||
monthly_structured + weekly_structured + daily_structured,
|
||||
["temperament_tendency", "temperament_signal"], default=""
|
||||
),
|
||||
"group_role": self._best_text(
|
||||
monthly_structured + weekly_structured + daily_structured,
|
||||
["group_role", "social_role"], default=""
|
||||
),
|
||||
"decision_profile": self._best_text(
|
||||
monthly_structured + weekly_structured + daily_structured,
|
||||
["decision_profile", "decision_style"], default=""
|
||||
),
|
||||
"engagement_traits": self._extract_scored_items(
|
||||
daily_structured + weekly_structured, ["engagement_traits", "stable_traits"], limit=4
|
||||
),
|
||||
@@ -144,7 +184,7 @@ class MemberContextService:
|
||||
"last_daily_digest_at": daily_digests[0].get("last_generated_at") if daily_digests else "",
|
||||
"last_weekly_digest_at": weekly_digests[0].get("last_generated_at") if weekly_digests else "",
|
||||
"last_monthly_digest_at": monthly_digests[0].get("last_generated_at") if monthly_digests else "",
|
||||
"refresh_mode": self._build_refresh_mode(existing_context, digest_snapshot),
|
||||
"refresh_mode": self._build_refresh_mode(existing_context, digest_snapshot, group_digest_stats),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -214,6 +254,13 @@ class MemberContextService:
|
||||
f"days={days}, limit_per_member={limit_per_member}"
|
||||
)
|
||||
|
||||
group_digest_stats = self.digest_service.ensure_recent_group_daily_digests(chatroom_id)
|
||||
self.LOG.info(
|
||||
f"[成员交互摘要] 群日摘要批处理完成: group={chatroom_id}, "
|
||||
f"built_daily={group_digest_stats.get('built_daily', 0)}, "
|
||||
f"touched_members={len(group_digest_stats.get('touched_members', []))}"
|
||||
)
|
||||
|
||||
for index, active_member in enumerate(active_members, start=1):
|
||||
wxid = active_member.get("wxid")
|
||||
if wxid not in enabled_members:
|
||||
@@ -231,7 +278,9 @@ class MemberContextService:
|
||||
f"last_profiled_at={(existing_context or {}).get('last_profiled_at')}"
|
||||
)
|
||||
continue
|
||||
context = self.build_member_context(chatroom_id, wxid, days=days, limit=limit_per_member)
|
||||
context = self.build_member_context(
|
||||
chatroom_id, wxid, days=days, limit=limit_per_member, ensure_group_daily=False
|
||||
)
|
||||
if context["source_message_count"] <= 0 and context.get("meta", {}).get("digest_daily_count", 0) <= 0:
|
||||
skipped += 1
|
||||
self.LOG.debug(
|
||||
@@ -329,7 +378,8 @@ class MemberContextService:
|
||||
WHERE group_id = %s
|
||||
AND sender IS NOT NULL
|
||||
AND sender <> ''
|
||||
AND timestamp >= DATE_SUB(NOW(), INTERVAL %s HOUR)
|
||||
AND timestamp >= DATE_SUB(CURDATE(), INTERVAL %s HOUR)
|
||||
AND timestamp < CURDATE()
|
||||
AND message_type IN (1, 49)
|
||||
GROUP BY sender
|
||||
HAVING COUNT(*) >= %s
|
||||
@@ -384,7 +434,8 @@ class MemberContextService:
|
||||
SELECT group_id, COUNT(*) AS msg_count
|
||||
FROM messages
|
||||
WHERE group_id LIKE %s
|
||||
AND timestamp >= DATE_SUB(NOW(), INTERVAL %s HOUR)
|
||||
AND timestamp >= DATE_SUB(CURDATE(), INTERVAL %s HOUR)
|
||||
AND timestamp < CURDATE()
|
||||
GROUP BY group_id
|
||||
HAVING COUNT(*) >= %s
|
||||
"""
|
||||
@@ -473,9 +524,16 @@ class MemberContextService:
|
||||
"summary_text": str(data.get("summary_text", "")).strip(),
|
||||
"confidence": max(0.0, min(1.0, confidence)),
|
||||
"meta": {
|
||||
"identity_traits": norm_list(data.get("identity_traits"), 5),
|
||||
"skill_profile": norm_list(data.get("skill_profile"), 6),
|
||||
"family_profile": norm_list(data.get("family_profile"), 4),
|
||||
"life_stage_profile": norm_list(data.get("life_stage_profile"), 4),
|
||||
"value_profile": norm_list(data.get("value_profile"), 5),
|
||||
"stable_traits": norm_list(data.get("stable_traits"), self.stable_max_items),
|
||||
"habit_patterns": norm_list(data.get("habit_patterns"), self.stable_max_items),
|
||||
"long_term_reply_preferences": norm_list(data.get("long_term_reply_preferences"), 4),
|
||||
"group_role": str(data.get("group_role", "")).strip(),
|
||||
"decision_profile": str(data.get("decision_profile", "")).strip(),
|
||||
"recent_state": norm_list(data.get("recent_state"), 4),
|
||||
"temperament_tendency": str(data.get("temperament_tendency", "")).strip(),
|
||||
"engagement_traits": norm_list(data.get("engagement_traits"), 4),
|
||||
@@ -510,6 +568,31 @@ class MemberContextService:
|
||||
meta.get("habit_patterns", []),
|
||||
current_context.get("confidence", 0),
|
||||
)
|
||||
merged_identity_scores = self._merge_scored_items(
|
||||
existing_meta.get("identity_trait_scores", {}),
|
||||
meta.get("identity_traits", []),
|
||||
current_context.get("confidence", 0) * 0.75,
|
||||
)
|
||||
merged_skill_scores = self._merge_scored_items(
|
||||
existing_meta.get("skill_profile_scores", {}),
|
||||
meta.get("skill_profile", []),
|
||||
current_context.get("confidence", 0) * 0.85,
|
||||
)
|
||||
merged_family_scores = self._merge_scored_items(
|
||||
existing_meta.get("family_profile_scores", {}),
|
||||
meta.get("family_profile", []),
|
||||
current_context.get("confidence", 0) * 0.55,
|
||||
)
|
||||
merged_life_stage_scores = self._merge_scored_items(
|
||||
existing_meta.get("life_stage_profile_scores", {}),
|
||||
meta.get("life_stage_profile", []),
|
||||
current_context.get("confidence", 0) * 0.65,
|
||||
)
|
||||
merged_value_scores = self._merge_scored_items(
|
||||
existing_meta.get("value_profile_scores", {}),
|
||||
meta.get("value_profile", []),
|
||||
current_context.get("confidence", 0) * 0.75,
|
||||
)
|
||||
merged_reply_pref_scores = self._merge_scored_items(
|
||||
existing_meta.get("long_term_reply_preference_scores", {}),
|
||||
meta.get("long_term_reply_preferences", []),
|
||||
@@ -524,13 +607,35 @@ class MemberContextService:
|
||||
meta["topic_scores"] = merged_topic_scores
|
||||
meta["stable_trait_scores"] = merged_trait_scores
|
||||
meta["habit_pattern_scores"] = merged_habit_scores
|
||||
meta["identity_trait_scores"] = merged_identity_scores
|
||||
meta["skill_profile_scores"] = merged_skill_scores
|
||||
meta["family_profile_scores"] = merged_family_scores
|
||||
meta["life_stage_profile_scores"] = merged_life_stage_scores
|
||||
meta["value_profile_scores"] = merged_value_scores
|
||||
meta["long_term_reply_preference_scores"] = merged_reply_pref_scores
|
||||
meta["temperament_tendency_scores"] = merged_temperament_scores
|
||||
meta["identity_traits"] = self._top_scored_items(merged_identity_scores, limit=5)
|
||||
meta["skill_profile"] = self._top_scored_items(merged_skill_scores, limit=6)
|
||||
meta["family_profile"] = self._top_scored_items(merged_family_scores, limit=4)
|
||||
meta["life_stage_profile"] = self._top_scored_items(merged_life_stage_scores, limit=4)
|
||||
meta["value_profile"] = self._top_scored_items(merged_value_scores, limit=5)
|
||||
meta["stable_traits"] = self._top_scored_items(merged_trait_scores, limit=self.stable_max_items)
|
||||
meta["habit_patterns"] = self._top_scored_items(merged_habit_scores, limit=self.stable_max_items)
|
||||
meta["long_term_reply_preferences"] = self._top_scored_items(merged_reply_pref_scores, limit=4)
|
||||
temperament = self._top_scored_items(merged_temperament_scores, limit=1)
|
||||
meta["temperament_tendency"] = temperament[0] if temperament else meta.get("temperament_tendency", "")
|
||||
if not meta["identity_traits"]:
|
||||
meta["identity_traits"] = (existing_meta.get("identity_traits") or [])[:5]
|
||||
if not meta["skill_profile"]:
|
||||
meta["skill_profile"] = (existing_meta.get("skill_profile") or [])[:6]
|
||||
if not meta["family_profile"]:
|
||||
meta["family_profile"] = (existing_meta.get("family_profile") or [])[:4]
|
||||
if not meta["life_stage_profile"]:
|
||||
meta["life_stage_profile"] = (existing_meta.get("life_stage_profile") or [])[:4]
|
||||
if not meta["value_profile"]:
|
||||
meta["value_profile"] = (existing_meta.get("value_profile") or [])[:5]
|
||||
meta["group_role"] = meta.get("group_role") or existing_meta.get("group_role") or ""
|
||||
meta["decision_profile"] = meta.get("decision_profile") or existing_meta.get("decision_profile") or ""
|
||||
meta["engagement_traits"] = (meta.get("engagement_traits") or existing_meta.get("engagement_traits") or [])[:4]
|
||||
meta["reply_taboos"] = (meta.get("reply_taboos") or existing_meta.get("reply_taboos") or [])[:3]
|
||||
meta["recent_state"] = (meta.get("recent_state") or existing_meta.get("recent_state") or [])[:4]
|
||||
@@ -611,9 +716,12 @@ class MemberContextService:
|
||||
def _sum_digest_source_count(daily_digests: List[Dict]) -> int:
|
||||
return sum(int(item.get("source_count", 0)) for item in daily_digests)
|
||||
|
||||
def _build_refresh_mode(self, existing_context: Optional[Dict], digest_snapshot: Dict) -> str:
|
||||
def _build_refresh_mode(self, existing_context: Optional[Dict], digest_snapshot: Dict,
|
||||
group_digest_stats: Optional[Dict] = None) -> str:
|
||||
if not existing_context:
|
||||
return "bootstrap"
|
||||
if (group_digest_stats or {}).get("built_daily", 0) > 0:
|
||||
return "daily_rollup"
|
||||
if (digest_snapshot.get("stats", {}) or {}).get("built_monthly", 0) > 0:
|
||||
return "recalibration"
|
||||
return "incremental"
|
||||
@@ -626,6 +734,12 @@ class MemberContextService:
|
||||
parts.append(f"{label}:{meta.get('temperament_tendency')}")
|
||||
if meta.get("stable_traits"):
|
||||
parts.append(f"长期特征:{'、'.join(meta.get('stable_traits')[:3])}")
|
||||
if meta.get("identity_traits"):
|
||||
parts.append(f"身份线索:{'、'.join(meta.get('identity_traits')[:2])}")
|
||||
if meta.get("skill_profile"):
|
||||
parts.append(f"技能画像:{'、'.join(meta.get('skill_profile')[:3])}")
|
||||
if meta.get("value_profile"):
|
||||
parts.append(f"判断偏好:{'、'.join(meta.get('value_profile')[:2])}")
|
||||
if meta.get("habit_patterns"):
|
||||
parts.append(f"习惯模式:{'、'.join(meta.get('habit_patterns')[:3])}")
|
||||
if meta.get("recent_state"):
|
||||
|
||||
Reference in New Issue
Block a user