修复最近上下文只取到当前消息的问题

This commit is contained in:
liuwei
2026-04-24 15:37:37 +08:00
parent 2fe6cbb758
commit d0480691c3
2 changed files with 72 additions and 1 deletions

View File

@@ -100,6 +100,7 @@ class MessageStorageDB(BaseDBOperator):
AND length(content) > %s
AND CHAR_LENGTH(content) < 300
AND content NOT LIKE '/%'
ORDER BY timestamp ASC
"""
params = (hours_ago, group_id, min_content_length)
return self.execute_query(sql, params) or []

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import asyncio
from datetime import datetime
import re
import time
import xml.etree.ElementTree as ET
@@ -345,7 +346,7 @@ class AIAutoResponsePlugin(MessagePluginInterface):
"timestamp": message.get("timestamp"),
}
self._append_group_message(room_id, normalized_message)
recent_messages = self.group_messages.get(room_id) or self.memory_store.get_recent_messages(room_id)
recent_messages = self._get_recent_messages_for_context(room_id)
group_name_map = self._build_group_name_map(room_id)
group_memory_bundle = self.group_memory.build(
room_id=room_id,
@@ -751,6 +752,75 @@ class AIAutoResponsePlugin(MessagePluginInterface):
if len(items) > size:
self.group_messages[room_id] = items[-size:]
def _get_recent_messages_for_context(self, room_id: str) -> List[Dict]:
# 最近上下文不能再走“内存 or 数据库”二选一:
# 1. 之前在 append 当前消息之后立刻读取内存,导致内存里只要有 1 条,数据库历史就完全失效;
# 2. 插件刚启动、刚切群、或该群近期还没在进程里积累消息时,就会只剩当前这一句;
# 3. 这里改成“数据库历史 + 进程内最近消息”合并,再统一去重排序,才能稳定拿到完整上下文。
db_recent = self.memory_store.get_recent_messages(room_id)
live_recent = list(self.group_messages.get(room_id) or [])
merged = self._merge_recent_messages(db_recent, live_recent)
size = int(self.mode_config.get("recent_context_size", 30) or 30)
return merged[-max(size, 1):]
@classmethod
def _merge_recent_messages(cls, db_recent: List[Dict], live_recent: List[Dict]) -> List[Dict]:
# 合并时优先保留更“新鲜、更完整”的内存消息:
# 1. DB 消息稳定但字段少,通常只有 sender/content/timestamp
# 2. 内存消息会带 sender_name、is_at 等运行时字段,适合直接给模型;
# 3. 如果两边是同一条消息,就让后加入的内存版本覆盖掉 DB 的简化版本。
merged_map: Dict[str, Dict] = {}
for item in list(db_recent or []) + list(live_recent or []):
normalized = dict(item or {})
key = cls._build_recent_message_identity(normalized)
existing = merged_map.get(key, {})
payload = dict(existing)
for field, value in normalized.items():
if value not in (None, "", []):
payload[field] = value
merged_map[key] = payload
ordered = list(merged_map.values())
ordered.sort(key=cls._recent_message_sort_key)
return ordered
@staticmethod
def _build_recent_message_identity(message: Dict) -> str:
sender = str(message.get("sender", "") or "").strip()
content = str(message.get("content") or message.get("message") or "").strip()
timestamp = str(message.get("timestamp", "") or "").strip()
# 这里用“时间 + 发送者 + 内容”做弱去重键:
# 1. 对同一条消息DB 和内存版本通常会共享这三类信息;
# 2. 这样足以把“当前消息的 DB 版本”和“当前消息的内存版本”合并成一条;
# 3. 即使偶发碰撞,也只会影响完全相同内容的近似重复消息,风险可接受。
return f"{timestamp}|{sender}|{content}"
@classmethod
def _recent_message_sort_key(cls, message: Dict) -> tuple:
timestamp = str(message.get("timestamp", "") or "").strip()
parsed = cls._parse_recent_message_timestamp(timestamp)
sender = str(message.get("sender", "") or "").strip()
content = str(message.get("content") or message.get("message") or "").strip()
if parsed is not None:
return (0, parsed.timestamp(), sender, content)
# 没有可解析时间时,仍然给一个稳定排序键,避免不同来源顺序抖动。
return (1, timestamp, sender, content)
@staticmethod
def _parse_recent_message_timestamp(value: str) -> datetime | None:
text = str(value or "").strip()
if not text:
return None
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d %H:%M:%S"):
try:
return datetime.strptime(text, fmt)
except ValueError:
continue
try:
return datetime.fromtimestamp(float(text))
except (TypeError, ValueError, OSError):
return None
def _call_llm(
self,
*,