from __future__ import annotations import re import time from typing import Dict, List, Set, Tuple class DedupManager: def __init__(self): self.inflight_message_keys: Set[str] = set() self.recent_message_keys: Dict[str, float] = {} self.recent_reply_signatures: Dict[str, float] = {} self.recent_room_content_hits: Dict[str, List[Tuple[float, str]]] = {} def begin_message_processing(self, message_key: str, expiry_sec: int) -> bool: if not message_key: return True now = time.time() stale_keys = [key for key, ts in self.recent_message_keys.items() if now - ts > expiry_sec] for key in stale_keys: self.recent_message_keys.pop(key, None) if message_key in self.inflight_message_keys: return False if message_key in self.recent_message_keys: return False self.inflight_message_keys.add(message_key) return True def finish_message_processing(self, message_key: str) -> None: if not message_key: return self.inflight_message_keys.discard(message_key) self.recent_message_keys[message_key] = time.time() def should_skip_duplicate_reply( self, *, room_id: str, sender: str, reply_text: str, expiry_sec: int, scope: str = "sender", ) -> bool: text = str(reply_text or "").strip() if not text: return False now = time.time() stale_keys = [key for key, ts in self.recent_reply_signatures.items() if now - ts > expiry_sec] for key in stale_keys: self.recent_reply_signatures.pop(key, None) signature = f"{room_id}:{text}" if scope == "room" else f"{room_id}:{sender}:{text}" if signature in self.recent_reply_signatures: return True self.recent_reply_signatures[signature] = now return False def should_skip_repeated_room_content( self, *, room_id: str, content: str, window_sec: int, repeat_threshold: int, min_length: int = 4, ) -> bool: text = self._normalize_room_content(content) if not room_id or not text or len(text) < max(int(min_length or 4), 1): return False now = time.time() window_sec = max(int(window_sec or 0), 1) repeat_threshold = max(int(repeat_threshold or 0), 2) room_items = self.recent_room_content_hits.get(room_id, []) room_items = [(ts, item_text) for ts, item_text in room_items if now - ts <= window_sec] same_count = sum(1 for _, item_text in room_items if item_text == text) room_items.append((now, text)) self.recent_room_content_hits[room_id] = room_items[-80:] return same_count + 1 >= repeat_threshold @staticmethod def _normalize_room_content(content: str) -> str: text = str(content or "").strip().lower() text = re.sub(r"\s+", "", text) return text