# -*- coding: utf-8 -*- """ 斗鱼弹幕本地测试脚本。 用途: 1. 直接读取用户提供的本地弹幕文本样本; 2. 跑一遍“本地提纯 + 证据簇提炼”链路; 3. 将结果输出到 temp/douyu_materials,方便人工查看; 4. 不依赖 Redis、Dify、直播 session。 """ import importlib.util import json import os import sys from datetime import datetime from pathlib import Path from typing import Any, Dict, List def _load_helper(): current_dir = Path(__file__).resolve().parent module_path = current_dir / "danmu_summary.py" spec = importlib.util.spec_from_file_location("douyu_danmu_summary_local", module_path) module = importlib.util.module_from_spec(spec) assert spec.loader is not None spec.loader.exec_module(module) return module.DouyuDanmuSummaryHelper def _load_report_template_module(): """ 单独按文件路径加载模板模块。 这样本地预览不需要完整初始化插件,也不依赖 Redis 或其他运行时对象。 """ current_dir = Path(__file__).resolve().parent project_root = current_dir.parent.parent project_root_str = str(project_root) # 把项目根目录补进 sys.path,保证 report_template.py 内部引用 utils 等项目模块时可正常导入。 if project_root_str not in sys.path: sys.path.insert(0, project_root_str) module_path = current_dir / "report_template.py" spec = importlib.util.spec_from_file_location("douyu_report_template_local", module_path) module = importlib.util.module_from_spec(spec) assert spec.loader is not None spec.loader.exec_module(module) return module def _build_session(room_id: str, anchor_day: str, messages: List[Dict[str, Any]]) -> Dict[str, Any]: ordered = sorted(messages, key=lambda item: item.get("timestamp") or datetime.min) if not ordered: return { "session_id": f"{room_id}_{anchor_day.replace('-', '')}_empty", "room_id": room_id, "anchor_day": anchor_day, "nickname": "", "room_name": "", "segments": [], } return { "session_id": f"{room_id}_{anchor_day.replace('-', '')}_local_test", "room_id": room_id, "anchor_day": anchor_day, "nickname": "", "room_name": "", "segments": [{ "start_time": ordered[0]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"), "end_time": ordered[-1]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"), }], } def _build_preview_template_payload(local_result: Dict[str, Any]) -> Dict[str, Any]: """ 将本地测试结果转成粉丝日报模板真正需要的结构。 这样预览链路和正式模板共用同一套字段命名,后续查问题更直观。 """ session_meta = local_result.get("session_meta", {}) or {} local_stats_preview = local_result.get("local_stats_preview", {}) or {} topic_clusters = local_result.get("topic_evidence_clusters", []) or [] hero_mentions = local_result.get("hero_mentions", []) or [] content_cues = local_result.get("content_cues", []) or [] timeline_digest = local_result.get("timeline_digest", []) or [] representative_messages = local_result.get("representative_messages", []) or [] return { "report_meta": { "room_id": str(session_meta.get("room_id") or "").strip(), "anchor_day": str(session_meta.get("anchor_day") or "").strip(), "nickname": str(session_meta.get("nickname") or "").strip(), "room_name": str(session_meta.get("room_name") or "").strip(), "session_count": 1, "message_count": int(session_meta.get("message_count", 0) or 0), "unique_user_count": int(session_meta.get("unique_user_count", 0) or 0), }, "local_stats": { "message_count": int(session_meta.get("message_count", 0) or 0), "unique_user_count": int(session_meta.get("unique_user_count", 0) or 0), "top_emotion_bursts": [ { "text": str(item.get("text") or "").strip(), "count": int(item.get("count", 0) or 0), } for item in content_cues if str(item.get("kind") or "").strip() == "emotion" ][:8], "top_repeated_messages": [ { "text": str(item.get("text") or "").strip(), "count": int(item.get("count", 0) or 0), "user_count": int(item.get("user_count", 0) or 0), } for item in local_stats_preview.get("top_repeated_messages", [])[:8] ], "peak_windows": [ { "start_time": str(item.get("start_time") or "").strip(), "message_count": int(item.get("message_count", 0) or 0), "user_count": int(item.get("user_count", 0) or 0), } for item in local_stats_preview.get("peak_buckets", [])[:6] ], }, "topic_evidence_clusters": [ { "label": str(item.get("label") or "").strip(), "count": int(item.get("match_count", item.get("count", 0)) or 0), "user_count": int(item.get("user_count", 0) or 0), "time_range": ( f"{str(item.get('first_hm') or '').strip()}-{str(item.get('last_hm') or '').strip()}" ).strip("-"), "keywords": item.get("keywords", []) or [], "samples": item.get("samples", []) or [], } for item in topic_clusters[:6] ], "compact_scene_material": { "semantic_fact_hints": { "hero_mentions": hero_mentions[:6], }, "content_cues": content_cues[:18], "timeline_digest": timeline_digest[:20], }, "representative_messages": representative_messages[:12], "repeated_messages": [ { "text": str(item.get("text") or "").strip(), "count": int(item.get("count", 0) or 0), "user_count": int(item.get("user_count", 0) or 0), } for item in local_stats_preview.get("top_repeated_messages", [])[:12] ], "burst_terms": [ { "text": str(item.get("text") or "").strip(), "count": int(item.get("count", 0) or 0), } for item in local_stats_preview.get("top_burst_terms", [])[:12] ], "peak_buckets": local_stats_preview.get("peak_buckets", [])[:6], "top_terms": [ {"term": str(keyword).strip(), "count": 0} for item in topic_clusters[:4] for keyword in (item.get("keywords", []) or [])[:2] if str(keyword).strip() ], } def _build_preview_report_text(payload: Dict[str, Any]) -> str: """ 为本地模板预览提供一份稳定的示例文本。 这里不依赖真实 LLM,只用已经提纯好的结果拼装固定结构, 方便我们快速检查模板是否把关键信息展示完整。 """ meta = payload.get("report_meta", {}) or {} topic_clusters = payload.get("topic_evidence_clusters", []) or [] hero_mentions = ( payload.get("compact_scene_material", {}) .get("semantic_fact_hints", {}) .get("hero_mentions", []) or [] ) repeated_messages = payload.get("repeated_messages", []) or [] burst_terms = payload.get("burst_terms", []) or [] peak_buckets = payload.get("peak_buckets", []) or [] representative_messages = payload.get("representative_messages", []) or [] anchor_day = str(meta.get("anchor_day") or "").strip() lines = [ f"{anchor_day} 这场直播的弹幕不只是热闹,核心信息也很密:赛事、位置、英雄、团播人物和摄像头梗都有人追着聊。", "【今日重点信息】", ] for item in topic_clusters[:5]: label = str(item.get("label") or "").strip() time_range = str(item.get("time_range") or "").strip() count = int(item.get("count", 0) or 0) samples = item.get("samples", []) or [] sample_text = str(samples[0].get("content") or "").strip()[:42] if samples else "" if label and sample_text: lines.append(f"- {label}从 {time_range or '全场'} 一直有人聊,相关弹幕约 {count} 条,代表说法是「{sample_text}」。") lines.append("【核心讨论话题】") for item in topic_clusters[:4]: label = str(item.get("label") or "").strip() keywords = [str(keyword).strip() for keyword in (item.get("keywords", []) or [])[:5] if str(keyword).strip()] if label and keywords: lines.append(f"- 大家围着 {label} 打转,关键词主要是 {'、'.join(keywords)}。") lines.append("【英雄与对局焦点】") for item in hero_mentions[:4]: hero_name = str(item.get("hero") or "").strip() mention_count = int(item.get("mention_count", 0) or 0) samples = item.get("samples", []) or [] sample_text = str(samples[0].get("content") or "").strip()[:36] if samples else "" if hero_name and sample_text: lines.append(f"- {hero_name}被点名 {mention_count} 次,弹幕现场直接聊到「{sample_text}」。") lines.append("【今日笑点】") if peak_buckets: top_bucket = peak_buckets[0] lines.append( f"- {str(top_bucket.get('start_time') or '')[-8:-3]} 前后是最热窗口,弹幕量直接冲到 {int(top_bucket.get('message_count', 0) or 0)} 条。" ) if repeated_messages: item = repeated_messages[0] lines.append(f"- 复读冠军是「{str(item.get('text') or '').strip()[:24]}」,一天被刷了 {int(item.get('count', 0) or 0)} 次。") if burst_terms: item = burst_terms[0] lines.append(f"- 情绪词「{str(item.get('text') or '').strip()}」集中爆了 {int(item.get('count', 0) or 0)} 次。") lines.append("【弹幕名场面】") for item in representative_messages[:5]: nickname = str(item.get("nickname") or "").strip() or "观众" content = str(item.get("content") or "").strip() if content: lines.append(f"- {nickname}:{content[:44]}") lines.append("【梗王榜】") for item in repeated_messages[:3]: lines.append(f"- {str(item.get('text') or '').strip()[:28]}|复读 {int(item.get('count', 0) or 0)} 次") lines.append("【收尾播报】") lines.append("- 本地预览版已经把有效信息和乐子一起塞进同一张图里了。") return "\n".join(lines) def run_local_test(file_path: str) -> str: helper = _load_helper() resolved_path = str(Path(file_path).resolve()) messages = helper.load_messages_from_file(resolved_path) file_name = Path(file_path).stem room_id, date_key = file_name.split("_", 1) anchor_day = f"{date_key[:4]}-{date_key[4:6]}-{date_key[6:8]}" session = _build_session(room_id, anchor_day, messages) payload = helper.build_llm_payload(room_id, session, messages) compact = payload.get("compact_prompt_assets", {}) or {} result = { "file_path": resolved_path, "message_count": len(messages), "session_meta": payload.get("session_meta", {}) or {}, "local_stats_preview": { "top_repeated_messages": (payload.get("repeated_messages", []) or [])[:10], "top_burst_terms": (payload.get("burst_terms", []) or [])[:10], "peak_buckets": (payload.get("peak_buckets", []) or [])[:6], }, "topic_evidence_clusters": ((compact.get("semantic_fact_hints", {}) or {}).get("topic_clusters", []) or [])[:8], "hero_mentions": ((compact.get("semantic_fact_hints", {}) or {}).get("hero_mentions", []) or [])[:8], "content_cues": (compact.get("content_cues", []) or [])[:16], "timeline_digest": (compact.get("timeline_digest", []) or [])[:12], "representative_messages": (payload.get("representative_messages", []) or [])[:18], } output_dir = Path(os.getcwd()) / "temp" / "douyu_materials" output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / f"{file_name}_local_test_result.json" output_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") return str(output_path) def render_fans_preview_from_file(file_path: str) -> str: """ 读取本地弹幕文件并直接产出新版粉丝日报 HTML 预览。 这样我们每次调整提纯逻辑或模板后,都能用同一条命令快速验收最终展示效果。 """ local_result_path = Path(run_local_test(file_path)) local_result = json.loads(local_result_path.read_text(encoding="utf-8")) payload = _build_preview_template_payload(local_result) report_text = _build_preview_report_text(payload) report_template = _load_report_template_module() html_content = report_template.render_fans_daily_report_html( payload=payload, fans_report_text=report_text, ) output_dir = Path(os.getcwd()) / "temp" / "douyu_materials" output_dir.mkdir(parents=True, exist_ok=True) file_name = Path(file_path).stem output_path = output_dir / f"{file_name}_fans_template_preview.html" output_path.write_text(html_content, encoding="utf-8") return str(output_path) if __name__ == "__main__": sample_files = [ r"plugins\douyu\danmu_test\52876_20260428.txt", r"plugins\douyu\danmu_test\52876_20260429.txt", ] for sample in sample_files: result_path = run_local_test(sample) preview_path = render_fans_preview_from_file(sample) print(result_path) print(preview_path)