将@抽取与社交图写入改为定时批处理

- 精简 archive_message 主链路:仅做消息归档,不再同步执行@解析与社交统计

- 新增 MessageStorageDB.process_pending_mentions 批处理能力,按批次回填 mentioned_user_ids 并写入社交图

- 新增系统任务 process_pending_mentions,每10分钟执行一次(every_seconds=600)

- 增加幂等保护:基于 t_message_mentions 已有记录过滤新增@关系,避免重复累加社交边和热度

- 保留详细中文注释,说明性能优化目标与批处理策略
This commit is contained in:
liuwei
2026-04-21 13:51:56 +08:00
parent 0dc55297bb
commit 906f0905df
3 changed files with 184 additions and 32 deletions

View File

@@ -43,6 +43,14 @@ def get_system_job_definitions(robot) -> List[Dict[str, Any]]:
"trigger_config": {"seconds": 300},
"handler": _build_process_pending_images_handler(robot),
},
{
"job_key": "process_pending_mentions",
"name": "待抽取@关系处理",
"description": "每 10 分钟处理一次待抽取@消息并更新社交图",
"trigger_type": "every_seconds",
"trigger_config": {"seconds": 600},
"handler": _build_process_pending_mentions_handler(robot),
},
]
def _build_process_pending_images_handler(robot) -> Callable[[], Awaitable[None]]:
@@ -53,6 +61,14 @@ def _build_process_pending_images_handler(robot) -> Callable[[], Awaitable[None]
return _handler
def _build_process_pending_mentions_handler(robot) -> Callable[[], Awaitable[None]]:
async def _handler():
if hasattr(robot, "message_storage") and robot.message_storage:
await robot.message_storage.process_pending_mentions(batch_size=200, max_age_days=7)
return _handler
class SystemJobLoader:
"""系统任务加载器:从数据库读取调度配置并注册到 async_job。"""

View File

@@ -358,6 +358,34 @@ class MessageStorage:
except Exception as e:
logger.exception(f"定时处理媒体任务出错: {e}")
async def process_pending_mentions(self, batch_size: int = 200, max_age_days: int = 7):
"""定时任务:批量处理待抽取 @ 的消息并写入社交图。
说明:
1. 该任务与主消息归档链路解耦,不阻塞实时收发;
2. 每次只处理有限批次,避免长事务和数据库抖动;
3. 重复执行安全:底层按 message_id + sender + mentioned_user_id 做幂等控制。
"""
try:
stats = self.message_db.process_pending_mentions(
batch_size=batch_size,
max_age_days=max_age_days,
)
total = int(stats.get("total", 0))
if total == 0:
logger.debug("待处理@抽取队列为空,本轮跳过")
return
logger.info(
"批量@抽取完成: "
f"读取={stats.get('total', 0)}, "
f"处理={stats.get('processed', 0)}, "
f"含@={stats.get('with_mentions', 0)}, "
f"失败={stats.get('failed', 0)}"
)
except Exception as e:
logger.exception(f"定时处理@抽取任务出错: {e}")
def _process_image_done(self, future):
"""任务完成统一回调(极轻量)"""
try: