Revert "支持向LLM上传原始弹幕TXT并补充全文输入"

This reverts commit 86c3dea1d2.
2026-04-29 14:15:26 +08:00
parent fd61f17448
commit 850fd5e7d3
2 changed files with 2 additions and 151 deletions
--- a/plugins/douyu/danmu_summary.py
+++ b/plugins/douyu/danmu_summary.py
@@ -93,28 +93,6 @@ class DouyuDanmuSummaryHelper:
                continue
        return collected

-    @classmethod
-    def collect_session_source_files(cls, room_id: str, session: Dict[str, Any], base_dir: str = "temp") -> List[str]:
-        """
-        收集某个 session 实际对应到的原始弹幕 txt 文件路径。
-        这里不读取文件内容，只返回“这场直播跨到了哪些日期文件”，
-        方便上层在需要时直接把原始 txt 上传给 LLM。
-        """
-        segments = cls._normalize_segments(session.get("segments", []) or [])
-        if not room_id or not segments:
-            return []
-
-        file_paths: List[str] = []
-        date_keys = sorted(
-            {segment["start"].strftime("%Y%m%d") for segment in segments}
-            | {segment["end"].strftime("%Y%m%d") for segment in segments}
-        )
-        for date_key in date_keys:
-            file_path = os.path.join(base_dir, "douyu_danmu", date_key, f"{room_id}_{date_key}.txt")
-            if os.path.exists(file_path):
-                file_paths.append(file_path)
-        return file_paths
-
    @classmethod
    def load_day_messages(cls, room_id: str, date_key: str, base_dir: str = "temp") -> List[Dict[str, Any]]:
        file_path = os.path.join(base_dir, "douyu_danmu", date_key, f"{room_id}_{date_key}.txt")
@@ -235,10 +213,6 @@ class DouyuDanmuSummaryHelper:
            "peak_buckets": cls._simplify_peak_buckets(peak_buckets),
            "representative_messages": cls._pick_representative_messages(organized_messages, bucket_stats),
            "raw_window_samples": cls._build_raw_window_samples(peak_buckets, per_bucket_limit=12),
-            # 把去噪后、且只合并了“完全相同重复弹幕”的原始弹幕全文也保留下来。
-            # 这样上层如果希望直接把整场弹幕塞给 LLM，而不是只喂摘要样本，
-            # 就不需要再重新读文件和重复清洗。
-            "raw_transcript_lines": cls._build_raw_transcript_lines(organized_messages),
            # 给日报类 LLM 再补一层“按时间推进的现场切片”。
            # 这样模型除了看热点窗口，还能顺着时间线理解气氛如何起、如何变、最后怎么收，
            # 对粉丝日报这类强调“节目效果”和“接梗链路”的文本尤其有帮助。
@@ -964,27 +938,6 @@ class DouyuDanmuSummaryHelper:
            })
        return windows

-    @classmethod
-    def _build_raw_transcript_lines(cls, messages: List[Dict[str, Any]]) -> List[str]:
-        """
-        生成可直接给 LLM 使用的顺时序弹幕全文。
-        规则：
-        1. 输入消息已经过“系统噪音过滤 + 完全相同重复合并”；
-        2. 不再进一步摘要，尽量保留现场原话；
-        3. 对重复合并过的消息补上次数信息，帮助模型感知刷屏强度。
-        """
-        lines: List[str] = []
-        for item in messages:
-            content = str(item.get("content") or "").strip()
-            if not content:
-                continue
-            time_text = str(item.get("timestamp_text") or "").strip()
-            nickname = str(item.get("nickname") or "").strip() or "观众"
-            repeat_count = int(item.get("repeat_count", 1) or 1)
-            repeat_suffix = f" [重复{repeat_count}次]" if repeat_count > 1 else ""
-            lines.append(f"[{time_text}] {nickname}：{content}{repeat_suffix}")
-        return lines
-
    @classmethod
    def _build_chronological_samples(
        cls,