84 lines
3.0 KiB
Python
84 lines
3.0 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
import time
|
|
from typing import Dict, List, Set, Tuple
|
|
|
|
|
|
class DedupManager:
|
|
def __init__(self):
|
|
self.inflight_message_keys: Set[str] = set()
|
|
self.recent_message_keys: Dict[str, float] = {}
|
|
self.recent_reply_signatures: Dict[str, float] = {}
|
|
self.recent_room_content_hits: Dict[str, List[Tuple[float, str]]] = {}
|
|
|
|
def begin_message_processing(self, message_key: str, expiry_sec: int) -> bool:
|
|
if not message_key:
|
|
return True
|
|
now = time.time()
|
|
stale_keys = [key for key, ts in self.recent_message_keys.items() if now - ts > expiry_sec]
|
|
for key in stale_keys:
|
|
self.recent_message_keys.pop(key, None)
|
|
if message_key in self.inflight_message_keys:
|
|
return False
|
|
if message_key in self.recent_message_keys:
|
|
return False
|
|
self.inflight_message_keys.add(message_key)
|
|
return True
|
|
|
|
def finish_message_processing(self, message_key: str) -> None:
|
|
if not message_key:
|
|
return
|
|
self.inflight_message_keys.discard(message_key)
|
|
self.recent_message_keys[message_key] = time.time()
|
|
|
|
def should_skip_duplicate_reply(
|
|
self,
|
|
*,
|
|
room_id: str,
|
|
sender: str,
|
|
reply_text: str,
|
|
expiry_sec: int,
|
|
scope: str = "sender",
|
|
) -> bool:
|
|
text = str(reply_text or "").strip()
|
|
if not text:
|
|
return False
|
|
now = time.time()
|
|
stale_keys = [key for key, ts in self.recent_reply_signatures.items() if now - ts > expiry_sec]
|
|
for key in stale_keys:
|
|
self.recent_reply_signatures.pop(key, None)
|
|
signature = f"{room_id}:{text}" if scope == "room" else f"{room_id}:{sender}:{text}"
|
|
if signature in self.recent_reply_signatures:
|
|
return True
|
|
self.recent_reply_signatures[signature] = now
|
|
return False
|
|
|
|
def should_skip_repeated_room_content(
|
|
self,
|
|
*,
|
|
room_id: str,
|
|
content: str,
|
|
window_sec: int,
|
|
repeat_threshold: int,
|
|
min_length: int = 4,
|
|
) -> bool:
|
|
text = self._normalize_room_content(content)
|
|
if not room_id or not text or len(text) < max(int(min_length or 4), 1):
|
|
return False
|
|
now = time.time()
|
|
window_sec = max(int(window_sec or 0), 1)
|
|
repeat_threshold = max(int(repeat_threshold or 0), 2)
|
|
room_items = self.recent_room_content_hits.get(room_id, [])
|
|
room_items = [(ts, item_text) for ts, item_text in room_items if now - ts <= window_sec]
|
|
same_count = sum(1 for _, item_text in room_items if item_text == text)
|
|
room_items.append((now, text))
|
|
self.recent_room_content_hits[room_id] = room_items[-80:]
|
|
return same_count + 1 >= repeat_threshold
|
|
|
|
@staticmethod
|
|
def _normalize_room_content(content: str) -> str:
|
|
text = str(content or "").strip().lower()
|
|
text = re.sub(r"\s+", "", text)
|
|
return text
|