- 新增本地弹幕文件测试入口,支持直接对样本文件生成提纯结果 - 将本地统计、主题证据簇和语义事实提示接入斗鱼日报LLM材料 - 明确降低情绪刷屏权重,改为优先提取赛事、位置、英雄、对局和场外互动信息
96 lines
3.6 KiB
Python
96 lines
3.6 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
斗鱼弹幕本地测试脚本。
|
||
|
||
用途:
|
||
1. 直接读取用户提供的本地弹幕文本样本;
|
||
2. 跑一遍“本地提纯 + 证据簇提炼”链路;
|
||
3. 将结果输出到 temp/douyu_materials,方便人工查看;
|
||
4. 不依赖 Redis、Dify、直播 session。
|
||
"""
|
||
|
||
import importlib.util
|
||
import json
|
||
import os
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List
|
||
|
||
|
||
def _load_helper():
|
||
current_dir = Path(__file__).resolve().parent
|
||
module_path = current_dir / "danmu_summary.py"
|
||
spec = importlib.util.spec_from_file_location("douyu_danmu_summary_local", module_path)
|
||
module = importlib.util.module_from_spec(spec)
|
||
assert spec.loader is not None
|
||
spec.loader.exec_module(module)
|
||
return module.DouyuDanmuSummaryHelper
|
||
|
||
|
||
def _build_session(room_id: str, anchor_day: str, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||
ordered = sorted(messages, key=lambda item: item.get("timestamp") or datetime.min)
|
||
if not ordered:
|
||
return {
|
||
"session_id": f"{room_id}_{anchor_day.replace('-', '')}_empty",
|
||
"room_id": room_id,
|
||
"anchor_day": anchor_day,
|
||
"nickname": "",
|
||
"room_name": "",
|
||
"segments": [],
|
||
}
|
||
return {
|
||
"session_id": f"{room_id}_{anchor_day.replace('-', '')}_local_test",
|
||
"room_id": room_id,
|
||
"anchor_day": anchor_day,
|
||
"nickname": "",
|
||
"room_name": "",
|
||
"segments": [{
|
||
"start_time": ordered[0]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"),
|
||
"end_time": ordered[-1]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"),
|
||
}],
|
||
}
|
||
|
||
|
||
def run_local_test(file_path: str) -> str:
|
||
helper = _load_helper()
|
||
resolved_path = str(Path(file_path).resolve())
|
||
messages = helper.load_messages_from_file(resolved_path)
|
||
file_name = Path(file_path).stem
|
||
room_id, date_key = file_name.split("_", 1)
|
||
anchor_day = f"{date_key[:4]}-{date_key[4:6]}-{date_key[6:8]}"
|
||
session = _build_session(room_id, anchor_day, messages)
|
||
payload = helper.build_llm_payload(room_id, session, messages)
|
||
|
||
compact = payload.get("compact_prompt_assets", {}) or {}
|
||
result = {
|
||
"file_path": resolved_path,
|
||
"message_count": len(messages),
|
||
"session_meta": payload.get("session_meta", {}) or {},
|
||
"local_stats_preview": {
|
||
"top_repeated_messages": (payload.get("repeated_messages", []) or [])[:10],
|
||
"top_burst_terms": (payload.get("burst_terms", []) or [])[:10],
|
||
"peak_buckets": (payload.get("peak_buckets", []) or [])[:6],
|
||
},
|
||
"topic_evidence_clusters": ((compact.get("semantic_fact_hints", {}) or {}).get("topic_clusters", []) or [])[:8],
|
||
"hero_mentions": ((compact.get("semantic_fact_hints", {}) or {}).get("hero_mentions", []) or [])[:8],
|
||
"content_cues": (compact.get("content_cues", []) or [])[:16],
|
||
"timeline_digest": (compact.get("timeline_digest", []) or [])[:12],
|
||
"representative_messages": (payload.get("representative_messages", []) or [])[:18],
|
||
}
|
||
|
||
output_dir = Path(os.getcwd()) / "temp" / "douyu_materials"
|
||
output_dir.mkdir(parents=True, exist_ok=True)
|
||
output_path = output_dir / f"{file_name}_local_test_result.json"
|
||
output_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
|
||
return str(output_path)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sample_files = [
|
||
r"plugins\douyu\danmu_test\52876_20260428.txt",
|
||
r"plugins\douyu\danmu_test\52876_20260429.txt",
|
||
]
|
||
for sample in sample_files:
|
||
path = run_local_test(sample)
|
||
print(path)
|