Files
abot/plugins/douyu/local_test_runner.py
liuwei 31848f67f6 重构斗鱼粉丝日报信息提纯链路
- 新增本地弹幕文件测试入口,支持直接对样本文件生成提纯结果
- 将本地统计、主题证据簇和语义事实提示接入斗鱼日报LLM材料
- 明确降低情绪刷屏权重,改为优先提取赛事、位置、英雄、对局和场外互动信息
2026-04-29 14:47:42 +08:00

96 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
斗鱼弹幕本地测试脚本。
用途:
1. 直接读取用户提供的本地弹幕文本样本;
2. 跑一遍“本地提纯 + 证据簇提炼”链路;
3. 将结果输出到 temp/douyu_materials方便人工查看
4. 不依赖 Redis、Dify、直播 session。
"""
import importlib.util
import json
import os
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List
def _load_helper():
current_dir = Path(__file__).resolve().parent
module_path = current_dir / "danmu_summary.py"
spec = importlib.util.spec_from_file_location("douyu_danmu_summary_local", module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module.DouyuDanmuSummaryHelper
def _build_session(room_id: str, anchor_day: str, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
ordered = sorted(messages, key=lambda item: item.get("timestamp") or datetime.min)
if not ordered:
return {
"session_id": f"{room_id}_{anchor_day.replace('-', '')}_empty",
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": "",
"room_name": "",
"segments": [],
}
return {
"session_id": f"{room_id}_{anchor_day.replace('-', '')}_local_test",
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": "",
"room_name": "",
"segments": [{
"start_time": ordered[0]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"),
"end_time": ordered[-1]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"),
}],
}
def run_local_test(file_path: str) -> str:
helper = _load_helper()
resolved_path = str(Path(file_path).resolve())
messages = helper.load_messages_from_file(resolved_path)
file_name = Path(file_path).stem
room_id, date_key = file_name.split("_", 1)
anchor_day = f"{date_key[:4]}-{date_key[4:6]}-{date_key[6:8]}"
session = _build_session(room_id, anchor_day, messages)
payload = helper.build_llm_payload(room_id, session, messages)
compact = payload.get("compact_prompt_assets", {}) or {}
result = {
"file_path": resolved_path,
"message_count": len(messages),
"session_meta": payload.get("session_meta", {}) or {},
"local_stats_preview": {
"top_repeated_messages": (payload.get("repeated_messages", []) or [])[:10],
"top_burst_terms": (payload.get("burst_terms", []) or [])[:10],
"peak_buckets": (payload.get("peak_buckets", []) or [])[:6],
},
"topic_evidence_clusters": ((compact.get("semantic_fact_hints", {}) or {}).get("topic_clusters", []) or [])[:8],
"hero_mentions": ((compact.get("semantic_fact_hints", {}) or {}).get("hero_mentions", []) or [])[:8],
"content_cues": (compact.get("content_cues", []) or [])[:16],
"timeline_digest": (compact.get("timeline_digest", []) or [])[:12],
"representative_messages": (payload.get("representative_messages", []) or [])[:18],
}
output_dir = Path(os.getcwd()) / "temp" / "douyu_materials"
output_dir.mkdir(parents=True, exist_ok=True)
output_path = output_dir / f"{file_name}_local_test_result.json"
output_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
return str(output_path)
if __name__ == "__main__":
sample_files = [
r"plugins\douyu\danmu_test\52876_20260428.txt",
r"plugins\douyu\danmu_test\52876_20260429.txt",
]
for sample in sample_files:
path = run_local_test(sample)
print(path)