将@关系批处理业务迁移到 value_rank 插件

- 从 MessageStorageDB 移除@抽取与社交图写入逻辑,消息层仅保留归档职责

- 从系统级任务移除 process_pending_mentions,取消 message_to_db 中对应入口

- 在 value_rank 插件新增定时动作 value_rank_mentions_extract(每10分钟)

- 在插件内实现窗口化批处理(默认10~20分钟前)、@提取、幂等写入明细/边表/日汇总及 unique_interactors 回填

- 新增插件侧可配置参数 mention_batch_size / mention_window_start_minutes / mention_window_end_minutes
This commit is contained in:
liuwei
2026-04-21 14:10:25 +08:00
parent d60d496bc3
commit d64d11a384
5 changed files with 359 additions and 460 deletions

View File

@@ -43,14 +43,6 @@ def get_system_job_definitions(robot) -> List[Dict[str, Any]]:
"trigger_config": {"seconds": 300},
"handler": _build_process_pending_images_handler(robot),
},
{
"job_key": "process_pending_mentions",
"name": "待抽取@关系处理",
"description": "每 10 分钟处理一次待抽取@消息并更新社交图",
"trigger_type": "every_seconds",
"trigger_config": {"seconds": 600},
"handler": _build_process_pending_mentions_handler(robot),
},
]
def _build_process_pending_images_handler(robot) -> Callable[[], Awaitable[None]]:
@@ -61,18 +53,6 @@ def _build_process_pending_images_handler(robot) -> Callable[[], Awaitable[None]
return _handler
def _build_process_pending_mentions_handler(robot) -> Callable[[], Awaitable[None]]:
async def _handler():
if hasattr(robot, "message_storage") and robot.message_storage:
await robot.message_storage.process_pending_mentions(
batch_size=200,
window_start_minutes=20,
window_end_minutes=10,
)
return _handler
class SystemJobLoader:
"""系统任务加载器:从数据库读取调度配置并注册到 async_job。"""

View File

@@ -358,49 +358,6 @@ class MessageStorage:
except Exception as e:
logger.exception(f"定时处理媒体任务出错: {e}")
async def process_pending_mentions(
self,
batch_size: int = 200,
window_start_minutes: int = 20,
window_end_minutes: int = 10,
):
"""定时任务:批量处理待抽取 @ 的消息并写入社交图。
说明:
1. 该任务与主消息归档链路解耦,不阻塞实时收发;
2. 每次只处理有限批次,避免长事务和数据库抖动;
3. 重复执行安全:底层按 message_id + sender + mentioned_user_id 做幂等控制。
4. 默认只处理 10~20 分钟前的数据,减少对热数据区间的扫描压力。
"""
try:
started_at = datetime.now()
logger.info(
"触发定时@抽取任务: "
f"batch_size={batch_size}, window=[NOW-{window_start_minutes}m, NOW-{window_end_minutes}m)"
)
stats = self.message_db.process_pending_mentions(
batch_size=batch_size,
window_start_minutes=window_start_minutes,
window_end_minutes=window_end_minutes,
)
total = int(stats.get("total", 0))
if total == 0:
elapsed_ms = int((datetime.now() - started_at).total_seconds() * 1000)
logger.info(f"定时@抽取任务结束: 无待处理数据, 耗时={elapsed_ms}ms")
return
elapsed_ms = int((datetime.now() - started_at).total_seconds() * 1000)
logger.info(
"定时@抽取任务结束: "
f"读取={stats.get('total', 0)}, "
f"处理={stats.get('processed', 0)}, "
f"含@={stats.get('with_mentions', 0)}, "
f"失败={stats.get('failed', 0)}, "
f"耗时={elapsed_ms}ms"
)
except Exception as e:
logger.exception(f"定时处理@抽取任务出错: {e}")
def _process_image_done(self, future):
"""任务完成统一回调(极轻量)"""
try: