Files
abot/plugins/ai_auto_response/safety/dedup.py

84 lines
3.0 KiB
Python

from __future__ import annotations
import re
import time
from typing import Dict, List, Set, Tuple
class DedupManager:
def __init__(self):
self.inflight_message_keys: Set[str] = set()
self.recent_message_keys: Dict[str, float] = {}
self.recent_reply_signatures: Dict[str, float] = {}
self.recent_room_content_hits: Dict[str, List[Tuple[float, str]]] = {}
def begin_message_processing(self, message_key: str, expiry_sec: int) -> bool:
if not message_key:
return True
now = time.time()
stale_keys = [key for key, ts in self.recent_message_keys.items() if now - ts > expiry_sec]
for key in stale_keys:
self.recent_message_keys.pop(key, None)
if message_key in self.inflight_message_keys:
return False
if message_key in self.recent_message_keys:
return False
self.inflight_message_keys.add(message_key)
return True
def finish_message_processing(self, message_key: str) -> None:
if not message_key:
return
self.inflight_message_keys.discard(message_key)
self.recent_message_keys[message_key] = time.time()
def should_skip_duplicate_reply(
self,
*,
room_id: str,
sender: str,
reply_text: str,
expiry_sec: int,
scope: str = "sender",
) -> bool:
text = str(reply_text or "").strip()
if not text:
return False
now = time.time()
stale_keys = [key for key, ts in self.recent_reply_signatures.items() if now - ts > expiry_sec]
for key in stale_keys:
self.recent_reply_signatures.pop(key, None)
signature = f"{room_id}:{text}" if scope == "room" else f"{room_id}:{sender}:{text}"
if signature in self.recent_reply_signatures:
return True
self.recent_reply_signatures[signature] = now
return False
def should_skip_repeated_room_content(
self,
*,
room_id: str,
content: str,
window_sec: int,
repeat_threshold: int,
min_length: int = 4,
) -> bool:
text = self._normalize_room_content(content)
if not room_id or not text or len(text) < max(int(min_length or 4), 1):
return False
now = time.time()
window_sec = max(int(window_sec or 0), 1)
repeat_threshold = max(int(repeat_threshold or 0), 2)
room_items = self.recent_room_content_hits.get(room_id, [])
room_items = [(ts, item_text) for ts, item_text in room_items if now - ts <= window_sec]
same_count = sum(1 for _, item_text in room_items if item_text == text)
room_items.append((now, text))
self.recent_room_content_hits[room_id] = room_items[-80:]
return same_count + 1 >= repeat_threshold
@staticmethod
def _normalize_room_content(content: str) -> str:
text = str(content or "").strip().lower()
text = re.sub(r"\s+", "", text)
return text