# -*- coding: utf-8 -*- """ 斗鱼弹幕本地测试脚本。 用途: 1. 直接读取用户提供的本地弹幕文本样本; 2. 跑一遍“本地提纯 + 证据簇提炼”链路; 3. 将结果输出到 temp/douyu_materials,方便人工查看; 4. 不依赖 Redis、Dify、直播 session。 """ import importlib.util import json import os from datetime import datetime from pathlib import Path from typing import Any, Dict, List def _load_helper(): current_dir = Path(__file__).resolve().parent module_path = current_dir / "danmu_summary.py" spec = importlib.util.spec_from_file_location("douyu_danmu_summary_local", module_path) module = importlib.util.module_from_spec(spec) assert spec.loader is not None spec.loader.exec_module(module) return module.DouyuDanmuSummaryHelper def _build_session(room_id: str, anchor_day: str, messages: List[Dict[str, Any]]) -> Dict[str, Any]: ordered = sorted(messages, key=lambda item: item.get("timestamp") or datetime.min) if not ordered: return { "session_id": f"{room_id}_{anchor_day.replace('-', '')}_empty", "room_id": room_id, "anchor_day": anchor_day, "nickname": "", "room_name": "", "segments": [], } return { "session_id": f"{room_id}_{anchor_day.replace('-', '')}_local_test", "room_id": room_id, "anchor_day": anchor_day, "nickname": "", "room_name": "", "segments": [{ "start_time": ordered[0]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"), "end_time": ordered[-1]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"), }], } def run_local_test(file_path: str) -> str: helper = _load_helper() resolved_path = str(Path(file_path).resolve()) messages = helper.load_messages_from_file(resolved_path) file_name = Path(file_path).stem room_id, date_key = file_name.split("_", 1) anchor_day = f"{date_key[:4]}-{date_key[4:6]}-{date_key[6:8]}" session = _build_session(room_id, anchor_day, messages) payload = helper.build_llm_payload(room_id, session, messages) compact = payload.get("compact_prompt_assets", {}) or {} result = { "file_path": resolved_path, "message_count": len(messages), "session_meta": payload.get("session_meta", {}) or {}, "local_stats_preview": { "top_repeated_messages": (payload.get("repeated_messages", []) or [])[:10], "top_burst_terms": (payload.get("burst_terms", []) or [])[:10], "peak_buckets": (payload.get("peak_buckets", []) or [])[:6], }, "topic_evidence_clusters": ((compact.get("semantic_fact_hints", {}) or {}).get("topic_clusters", []) or [])[:8], "hero_mentions": ((compact.get("semantic_fact_hints", {}) or {}).get("hero_mentions", []) or [])[:8], "content_cues": (compact.get("content_cues", []) or [])[:16], "timeline_digest": (compact.get("timeline_digest", []) or [])[:12], "representative_messages": (payload.get("representative_messages", []) or [])[:18], } output_dir = Path(os.getcwd()) / "temp" / "douyu_materials" output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / f"{file_name}_local_test_result.json" output_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") return str(output_path) if __name__ == "__main__": sample_files = [ r"plugins\douyu\danmu_test\52876_20260428.txt", r"plugins\douyu\danmu_test\52876_20260429.txt", ] for sample in sample_files: path = run_local_test(sample) print(path)