abot/plugins/douyu/local_test_runner.py

# -*- coding: utf-8 -*-
"""
斗鱼弹幕本地测试脚本。

用途：
1. 直接读取用户提供的本地弹幕文本样本；
2. 跑一遍“本地提纯 + 证据簇提炼”链路；
3. 将结果输出到 temp/douyu_materials，方便人工查看；
4. 不依赖 Redis、Dify、直播 session。
"""

import importlib.util
import json
import os
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List


def _load_helper():
    current_dir = Path(__file__).resolve().parent
    module_path = current_dir / "danmu_summary.py"
    spec = importlib.util.spec_from_file_location("douyu_danmu_summary_local", module_path)
    module = importlib.util.module_from_spec(spec)
    assert spec.loader is not None
    spec.loader.exec_module(module)
    return module.DouyuDanmuSummaryHelper


def _build_session(room_id: str, anchor_day: str, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
    ordered = sorted(messages, key=lambda item: item.get("timestamp") or datetime.min)
    if not ordered:
        return {
            "session_id": f"{room_id}_{anchor_day.replace('-', '')}_empty",
            "room_id": room_id,
            "anchor_day": anchor_day,
            "nickname": "",
            "room_name": "",
            "segments": [],
        }
    return {
        "session_id": f"{room_id}_{anchor_day.replace('-', '')}_local_test",
        "room_id": room_id,
        "anchor_day": anchor_day,
        "nickname": "",
        "room_name": "",
        "segments": [{
            "start_time": ordered[0]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"),
            "end_time": ordered[-1]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"),
        }],
    }


def run_local_test(file_path: str) -> str:
    helper = _load_helper()
    resolved_path = str(Path(file_path).resolve())
    messages = helper.load_messages_from_file(resolved_path)
    file_name = Path(file_path).stem
    room_id, date_key = file_name.split("_", 1)
    anchor_day = f"{date_key[:4]}-{date_key[4:6]}-{date_key[6:8]}"
    session = _build_session(room_id, anchor_day, messages)
    payload = helper.build_llm_payload(room_id, session, messages)

    compact = payload.get("compact_prompt_assets", {}) or {}
    result = {
        "file_path": resolved_path,
        "message_count": len(messages),
        "session_meta": payload.get("session_meta", {}) or {},
        "local_stats_preview": {
            "top_repeated_messages": (payload.get("repeated_messages", []) or [])[:10],
            "top_burst_terms": (payload.get("burst_terms", []) or [])[:10],
            "peak_buckets": (payload.get("peak_buckets", []) or [])[:6],
        },
        "topic_evidence_clusters": ((compact.get("semantic_fact_hints", {}) or {}).get("topic_clusters", []) or [])[:8],
        "hero_mentions": ((compact.get("semantic_fact_hints", {}) or {}).get("hero_mentions", []) or [])[:8],
        "content_cues": (compact.get("content_cues", []) or [])[:16],
        "timeline_digest": (compact.get("timeline_digest", []) or [])[:12],
        "representative_messages": (payload.get("representative_messages", []) or [])[:18],
    }

    output_dir = Path(os.getcwd()) / "temp" / "douyu_materials"
    output_dir.mkdir(parents=True, exist_ok=True)
    output_path = output_dir / f"{file_name}_local_test_result.json"
    output_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
    return str(output_path)


if __name__ == "__main__":
    sample_files = [
        r"plugins\douyu\danmu_test\52876_20260428.txt",
        r"plugins\douyu\danmu_test\52876_20260429.txt",
    ]
    for sample in sample_files:
        path = run_local_test(sample)
        print(path)