|
|
|
|
@@ -101,6 +101,7 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
|
|
|
|
self.queue_workers: List[asyncio.Task] = []
|
|
|
|
|
self.reply_limits: Dict[str, Any] = {}
|
|
|
|
|
self.prompt_compact_config: Dict[str, Any] = {}
|
|
|
|
|
self.message_expire_sec = 0.0
|
|
|
|
|
|
|
|
|
|
def initialize(self, context: Dict[str, Any]) -> bool:
|
|
|
|
|
self.LOG = logger
|
|
|
|
|
@@ -148,6 +149,20 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
|
|
|
|
timeout_base = int((self._config.get("api", {}) or {}).get("timeout_seconds", 60) or 60)
|
|
|
|
|
timeout_fallback = max(timeout_base * 2, 90)
|
|
|
|
|
self.llm_call_timeout_sec = max(int(runtime_config.get("llm_call_timeout_sec", timeout_fallback) or timeout_fallback), 10)
|
|
|
|
|
# 群聊是强时效场景:
|
|
|
|
|
# 1. 如果一条消息已经在队列里放太久,再回往往比“不回”更奇怪;
|
|
|
|
|
# 2. 因此这里引入消息过期时间,后续会在“出队前”和“发送前”各检查一次;
|
|
|
|
|
# 3. 默认沿用 question_reply_timeout_sec 的时效感,再允许 runtime 单独覆盖。
|
|
|
|
|
self.message_expire_sec = max(
|
|
|
|
|
float(
|
|
|
|
|
runtime_config.get(
|
|
|
|
|
"message_expire_sec",
|
|
|
|
|
(self._config.get("mode", {}) or {}).get("question_reply_timeout_sec", 12),
|
|
|
|
|
)
|
|
|
|
|
or 12
|
|
|
|
|
),
|
|
|
|
|
1.0,
|
|
|
|
|
)
|
|
|
|
|
self.queue_worker_count = max(int(runtime_config.get("queue_worker_count", 2) or 2), 1)
|
|
|
|
|
self.queue_maxsize = max(int(runtime_config.get("queue_maxsize", 500) or 500), 10)
|
|
|
|
|
self.message_queue = asyncio.Queue(maxsize=self.queue_maxsize)
|
|
|
|
|
@@ -159,7 +174,7 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
|
|
|
|
self.log_debug = bool((self._config.get("logging", {}) or {}).get("debug", True))
|
|
|
|
|
self.LOG.debug(
|
|
|
|
|
f"[{self.name}] 初始化完成 llm_max_concurrency={llm_max_concurrency} llm_call_timeout_sec={self.llm_call_timeout_sec} "
|
|
|
|
|
f"queue_worker_count={self.queue_worker_count} queue_maxsize={self.queue_maxsize}"
|
|
|
|
|
f"message_expire_sec={self.message_expire_sec} queue_worker_count={self.queue_worker_count} queue_maxsize={self.queue_maxsize}"
|
|
|
|
|
)
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
@@ -213,6 +228,9 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
|
|
|
|
self.message_queue = asyncio.Queue(maxsize=self.queue_maxsize)
|
|
|
|
|
self._ensure_workers_started()
|
|
|
|
|
queued_message = dict(message)
|
|
|
|
|
# 记录入队时刻,供后续判断这条消息是否已经“聊过时”。
|
|
|
|
|
# 使用 monotonic 避免系统时间调整影响队列老化判断。
|
|
|
|
|
queued_message["_queued_at_mono"] = time.monotonic()
|
|
|
|
|
try:
|
|
|
|
|
self.message_queue.put_nowait(queued_message)
|
|
|
|
|
self._log_event(
|
|
|
|
|
@@ -240,6 +258,18 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
|
|
|
|
bot: WechatAPIClient = message.get("bot")
|
|
|
|
|
is_at = bool(message.get("is_at", False))
|
|
|
|
|
content = self._normalize_content(message)
|
|
|
|
|
stale_age_sec = self._get_message_queue_age_sec(message)
|
|
|
|
|
if self._is_message_stale(message):
|
|
|
|
|
self._log_event(
|
|
|
|
|
"skip",
|
|
|
|
|
room_id=room_id,
|
|
|
|
|
sender=sender,
|
|
|
|
|
reason="stale_queued_message",
|
|
|
|
|
trigger_type="stale_guard",
|
|
|
|
|
reply_mode="drop",
|
|
|
|
|
age_sec=round(stale_age_sec, 2),
|
|
|
|
|
)
|
|
|
|
|
return False, "stale_queued_message"
|
|
|
|
|
message_key = self._build_message_key(message, content)
|
|
|
|
|
dedup_expiry = int(self.cooldown_config.get("message_dedup_window_sec", 180))
|
|
|
|
|
if not self.dedup.begin_message_processing(message_key, dedup_expiry):
|
|
|
|
|
@@ -584,6 +614,22 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
|
|
|
|
|
|
|
|
|
reply_chunks = finalize_reply(reply_text, reply_mode, self.reply_limits)
|
|
|
|
|
final_response_text = "\n".join(reply_chunks)
|
|
|
|
|
# 第二次过期判断:
|
|
|
|
|
# 1. 这一步专门防止“LLM 慢返回后补发过时回复”;
|
|
|
|
|
# 2. 即使消息进模型时还新鲜,等模型回完也可能已经跟不上群聊了;
|
|
|
|
|
# 3. 这种情况下直接放弃发送,比突然补回旧话更自然。
|
|
|
|
|
if self._is_message_stale(message):
|
|
|
|
|
self._log_event(
|
|
|
|
|
"skip",
|
|
|
|
|
room_id=room_id,
|
|
|
|
|
sender=sender,
|
|
|
|
|
reason="stale_before_send",
|
|
|
|
|
trigger_type=trigger.trigger_type,
|
|
|
|
|
reply_mode=reply_mode,
|
|
|
|
|
topic=selected_topic,
|
|
|
|
|
age_sec=round(self._get_message_queue_age_sec(message), 2),
|
|
|
|
|
)
|
|
|
|
|
return False, "stale_before_send"
|
|
|
|
|
reply_dedup_expiry = int(self.cooldown_config.get("reply_dedup_window_sec", 90))
|
|
|
|
|
if not reply_chunks or self.dedup.should_skip_duplicate_reply(
|
|
|
|
|
room_id=room_id,
|
|
|
|
|
@@ -1218,6 +1264,22 @@ class AIAutoResponsePlugin(MessagePluginInterface):
|
|
|
|
|
timestamp = str(int(float(message.get("timestamp") or 0)))
|
|
|
|
|
return f"{room_id}:{sender}:{timestamp}:{preview_text(content, 48)}"
|
|
|
|
|
|
|
|
|
|
def _get_message_queue_age_sec(self, message: Dict[str, Any]) -> float:
|
|
|
|
|
queued_at = message.get("_queued_at_mono")
|
|
|
|
|
if queued_at in (None, ""):
|
|
|
|
|
return 0.0
|
|
|
|
|
try:
|
|
|
|
|
return max(time.monotonic() - float(queued_at), 0.0)
|
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
|
return 0.0
|
|
|
|
|
|
|
|
|
|
def _is_message_stale(self, message: Dict[str, Any]) -> bool:
|
|
|
|
|
# 这里只看“排队/等待总时长”,不依赖消息业务时间戳:
|
|
|
|
|
# 1. 队列老化才是补发过时回复的直接原因;
|
|
|
|
|
# 2. 不同上游消息时间字段格式不统一,而入队时间一定可控;
|
|
|
|
|
# 3. 这样实现最稳定,也最符合“超过多久就别回了”的产品语义。
|
|
|
|
|
return self._get_message_queue_age_sec(message) >= float(self.message_expire_sec)
|
|
|
|
|
|
|
|
|
|
def _normalize_content(self, message: Dict[str, Any]) -> str:
|
|
|
|
|
msg_type = message.get("type")
|
|
|
|
|
content = str(message.get("content", "")).strip()
|
|
|
|
|
|