完善成员画像定时任务的周月摘要补偿策略
This commit is contained in:
@@ -124,24 +124,18 @@ class MemberDigestService:
|
|||||||
"stats": {"daily": 0, "weekly": 0, "monthly": 0, "active_days": 0, "built_daily": 0},
|
"stats": {"daily": 0, "weekly": 0, "monthly": 0, "active_days": 0, "built_daily": 0},
|
||||||
}
|
}
|
||||||
|
|
||||||
latest_daily_date = self._extract_latest_daily_date(all_daily_digests)
|
|
||||||
built_weekly = 0
|
built_weekly = 0
|
||||||
built_monthly = 0
|
built_monthly = 0
|
||||||
if enable_weekly and (force or self._should_run_weekly(latest_daily_date)):
|
# 周/月摘要改成“触发即补偿”的思路:
|
||||||
|
# 1. 只要进入周/月任务,就检查历史上所有“已完结但缺失”的周期;
|
||||||
|
# 2. 缺了就补,不再依赖“今天刚好是不是周日/月末”这种单点窗口;
|
||||||
|
# 3. 具体是否跳过当前未完结周期,放到 _ensure_weekly_digests / _ensure_monthly_digests 内部判断。
|
||||||
|
if enable_weekly:
|
||||||
built_weekly = self._ensure_weekly_digests(chatroom_id, wxid, display_name, force=force)
|
built_weekly = self._ensure_weekly_digests(chatroom_id, wxid, display_name, force=force)
|
||||||
elif enable_weekly:
|
|
||||||
self.LOG.debug(
|
|
||||||
f"[成员交互摘要][周摘要] 本次跳过(未到周处理窗口): "
|
|
||||||
f"group={chatroom_id}, wxid={wxid}, latest_daily_date={latest_daily_date}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if enable_monthly and (force or self._should_run_monthly(latest_daily_date)):
|
# 月摘要依赖周摘要,所以这里默认在周摘要补偿完成后再继续检查月摘要缺口。
|
||||||
|
if enable_monthly:
|
||||||
built_monthly = self._ensure_monthly_digests(chatroom_id, wxid, display_name, force=force)
|
built_monthly = self._ensure_monthly_digests(chatroom_id, wxid, display_name, force=force)
|
||||||
elif enable_monthly:
|
|
||||||
self.LOG.debug(
|
|
||||||
f"[成员交互摘要][月摘要] 本次跳过(未到月处理窗口): "
|
|
||||||
f"group={chatroom_id}, wxid={wxid}, latest_daily_date={latest_daily_date}"
|
|
||||||
)
|
|
||||||
|
|
||||||
all_weekly_digests = self.digest_db.list_digests(chatroom_id, wxid, "weekly", limit=200)
|
all_weekly_digests = self.digest_db.list_digests(chatroom_id, wxid, "weekly", limit=200)
|
||||||
all_monthly_digests = self.digest_db.list_digests(chatroom_id, wxid, "monthly", limit=120)
|
all_monthly_digests = self.digest_db.list_digests(chatroom_id, wxid, "monthly", limit=120)
|
||||||
@@ -169,30 +163,12 @@ class MemberDigestService:
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_latest_daily_date(self, daily_digests: List[Dict]) -> Optional[datetime]:
|
|
||||||
if not daily_digests:
|
|
||||||
return None
|
|
||||||
latest_key = daily_digests[0].get("period_key") or daily_digests[0].get("period_end")
|
|
||||||
return self._parse_period_date(latest_key)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _parse_period_date(value: Optional[str]) -> Optional[datetime]:
|
def _get_closed_reference_date() -> datetime:
|
||||||
if not value:
|
# 摘要只基于“已经完整结束的一天”做补偿判断。
|
||||||
return None
|
# 例如凌晨跑任务时,当天仍在进行中,所以统一以“昨天”为参照,
|
||||||
try:
|
# 这样就能稳定地判断出“哪些周/月已经完结,哪些还是当前进行中的周期”。
|
||||||
return datetime.strptime(str(value)[:10], "%Y-%m-%d")
|
return datetime.now() - timedelta(days=1)
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _should_run_weekly(self, latest_daily_date: Optional[datetime]) -> bool:
|
|
||||||
if not latest_daily_date:
|
|
||||||
return False
|
|
||||||
return latest_daily_date.weekday() == 6
|
|
||||||
|
|
||||||
def _should_run_monthly(self, latest_daily_date: Optional[datetime]) -> bool:
|
|
||||||
if not latest_daily_date:
|
|
||||||
return False
|
|
||||||
return (latest_daily_date + timedelta(days=1)).day == 1
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _normalize_profile_item(item: Dict) -> Dict:
|
def _normalize_profile_item(item: Dict) -> Dict:
|
||||||
@@ -223,18 +199,28 @@ class MemberDigestService:
|
|||||||
|
|
||||||
def _ensure_weekly_digests(self, chatroom_id: str, wxid: str, display_name: str, force: bool = False) -> int:
|
def _ensure_weekly_digests(self, chatroom_id: str, wxid: str, display_name: str, force: bool = False) -> int:
|
||||||
daily_digests = self.digest_db.list_digests(chatroom_id, wxid, "daily", limit=400)
|
daily_digests = self.digest_db.list_digests(chatroom_id, wxid, "daily", limit=400)
|
||||||
|
if not daily_digests:
|
||||||
|
return 0
|
||||||
|
|
||||||
grouped = defaultdict(list)
|
grouped = defaultdict(list)
|
||||||
for item in daily_digests:
|
for item in daily_digests:
|
||||||
week_key, _, _ = self._week_period_bounds(item.get("period_key"))
|
week_key, _, _ = self._week_period_bounds(item.get("period_key"))
|
||||||
grouped[week_key].append(item)
|
grouped[week_key].append(item)
|
||||||
|
|
||||||
existing_keys = set(self.digest_db.list_digest_keys(chatroom_id, wxid, "weekly"))
|
existing_keys = set(self.digest_db.list_digest_keys(chatroom_id, wxid, "weekly"))
|
||||||
current_week_key, _, _ = self._week_period_bounds((datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d"))
|
reference_date = self._get_closed_reference_date()
|
||||||
|
current_week_key, _, _ = self._week_period_bounds(reference_date.strftime("%Y-%m-%d"))
|
||||||
built = 0
|
built = 0
|
||||||
for week_key, items in sorted(grouped.items()):
|
for week_key, items in sorted(grouped.items()):
|
||||||
if len(items) < 2:
|
if len(items) < 2:
|
||||||
continue
|
continue
|
||||||
if not force and week_key in existing_keys and week_key != current_week_key:
|
# 非强制模式下,只补“已完结”的历史周:
|
||||||
|
# 1. 当前参照周还没走完,不能提前生成;
|
||||||
|
# 2. 已存在的历史周不重复生成;
|
||||||
|
# 3. 这样周任务每次触发时,都能把之前漏掉的周摘要自动补齐。
|
||||||
|
if not force and week_key == current_week_key:
|
||||||
|
continue
|
||||||
|
if not force and week_key in existing_keys:
|
||||||
continue
|
continue
|
||||||
period_key, period_start, period_end = self._week_period_bounds(items[0].get("period_key"))
|
period_key, period_start, period_end = self._week_period_bounds(items[0].get("period_key"))
|
||||||
digest = self._build_period_digest(
|
digest = self._build_period_digest(
|
||||||
@@ -251,18 +237,28 @@ class MemberDigestService:
|
|||||||
|
|
||||||
def _ensure_monthly_digests(self, chatroom_id: str, wxid: str, display_name: str, force: bool = False) -> int:
|
def _ensure_monthly_digests(self, chatroom_id: str, wxid: str, display_name: str, force: bool = False) -> int:
|
||||||
weekly_digests = self.digest_db.list_digests(chatroom_id, wxid, "weekly", limit=200)
|
weekly_digests = self.digest_db.list_digests(chatroom_id, wxid, "weekly", limit=200)
|
||||||
|
if not weekly_digests:
|
||||||
|
return 0
|
||||||
|
|
||||||
grouped = defaultdict(list)
|
grouped = defaultdict(list)
|
||||||
for item in weekly_digests:
|
for item in weekly_digests:
|
||||||
month_key, _, _ = self._month_period_bounds(item.get("period_end"))
|
month_key, _, _ = self._month_period_bounds(item.get("period_end"))
|
||||||
grouped[month_key].append(item)
|
grouped[month_key].append(item)
|
||||||
|
|
||||||
existing_keys = set(self.digest_db.list_digest_keys(chatroom_id, wxid, "monthly"))
|
existing_keys = set(self.digest_db.list_digest_keys(chatroom_id, wxid, "monthly"))
|
||||||
current_month_key, _, _ = self._month_period_bounds((datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d"))
|
reference_date = self._get_closed_reference_date()
|
||||||
|
current_month_key, _, _ = self._month_period_bounds(reference_date.strftime("%Y-%m-%d"))
|
||||||
built = 0
|
built = 0
|
||||||
for month_key, items in sorted(grouped.items()):
|
for month_key, items in sorted(grouped.items()):
|
||||||
if len(items) < 2:
|
if len(items) < 2:
|
||||||
continue
|
continue
|
||||||
if not force and month_key in existing_keys and month_key != current_month_key:
|
# 月摘要同样只补“已经完结的月份”:
|
||||||
|
# 1. 当前月仍可能继续产生新周摘要,不能过早固化;
|
||||||
|
# 2. 历史缺失月份在月任务/周任务触发时都会被补齐;
|
||||||
|
# 3. 这样即使某次月任务漏跑,后续任务也能自动追平。
|
||||||
|
if not force and month_key == current_month_key:
|
||||||
|
continue
|
||||||
|
if not force and month_key in existing_keys:
|
||||||
continue
|
continue
|
||||||
period_key, period_start, period_end = self._month_period_bounds(items[-1].get("period_end"))
|
period_key, period_start, period_end = self._month_period_bounds(items[-1].get("period_end"))
|
||||||
digest = self._build_period_digest(
|
digest = self._build_period_digest(
|
||||||
|
|||||||
@@ -127,8 +127,12 @@ class MemberContextPlugin(MessagePluginInterface):
|
|||||||
|
|
||||||
# 兼容“指定群执行”的场景;若未指定则沿用全量刷新逻辑。
|
# 兼容“指定群执行”的场景;若未指定则沿用全量刷新逻辑。
|
||||||
target_groups = [str(g).strip() for g in (context.get("target_groups") or []) if str(g).strip()]
|
target_groups = [str(g).strip() for g in (context.get("target_groups") or []) if str(g).strip()]
|
||||||
enable_weekly = action_key == "weekly_refresh"
|
# 调度层补偿策略:
|
||||||
enable_monthly = action_key == "monthly_refresh"
|
# 1. 周任务除了补周摘要,也顺手检查月摘要缺口;
|
||||||
|
# 2. 月任务先补齐缺失周摘要,再继续补月摘要,避免“月摘要依赖周摘要但周摘要没补上”;
|
||||||
|
# 3. 日任务仍保持轻量,不主动放大到全量周/月补偿。
|
||||||
|
enable_weekly = action_key in {"weekly_refresh", "monthly_refresh"}
|
||||||
|
enable_monthly = action_key in {"weekly_refresh", "monthly_refresh"}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if target_groups:
|
if target_groups:
|
||||||
|
|||||||
Reference in New Issue
Block a user