Files
abot/plugins/douyu/local_test_runner.py
liuwei 7de1dc9ee3 补充斗鱼粉丝日报本地预览脚本并压缩版面
1. 为本地测试脚本增加粉丝日报 HTML 预览输出,统一映射新版模板需要的数据结构\n2. 内置稳定的预览文案拼装逻辑,方便不依赖LLM也能本地验收页面效果\n3. 压缩粉丝日报模板的卡片间距、字号、行高和高度,让同样的信息更紧凑简约地展示
2026-04-29 15:19:27 +08:00

316 lines
14 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
斗鱼弹幕本地测试脚本。
用途:
1. 直接读取用户提供的本地弹幕文本样本;
2. 跑一遍“本地提纯 + 证据簇提炼”链路;
3. 将结果输出到 temp/douyu_materials方便人工查看
4. 不依赖 Redis、Dify、直播 session。
"""
import importlib.util
import json
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List
def _load_helper():
current_dir = Path(__file__).resolve().parent
module_path = current_dir / "danmu_summary.py"
spec = importlib.util.spec_from_file_location("douyu_danmu_summary_local", module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module.DouyuDanmuSummaryHelper
def _load_report_template_module():
"""
单独按文件路径加载模板模块。
这样本地预览不需要完整初始化插件,也不依赖 Redis 或其他运行时对象。
"""
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent
project_root_str = str(project_root)
# 把项目根目录补进 sys.path保证 report_template.py 内部引用 utils 等项目模块时可正常导入。
if project_root_str not in sys.path:
sys.path.insert(0, project_root_str)
module_path = current_dir / "report_template.py"
spec = importlib.util.spec_from_file_location("douyu_report_template_local", module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module
def _build_session(room_id: str, anchor_day: str, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
ordered = sorted(messages, key=lambda item: item.get("timestamp") or datetime.min)
if not ordered:
return {
"session_id": f"{room_id}_{anchor_day.replace('-', '')}_empty",
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": "",
"room_name": "",
"segments": [],
}
return {
"session_id": f"{room_id}_{anchor_day.replace('-', '')}_local_test",
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": "",
"room_name": "",
"segments": [{
"start_time": ordered[0]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"),
"end_time": ordered[-1]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"),
}],
}
def _build_preview_template_payload(local_result: Dict[str, Any]) -> Dict[str, Any]:
"""
将本地测试结果转成粉丝日报模板真正需要的结构。
这样预览链路和正式模板共用同一套字段命名,后续查问题更直观。
"""
session_meta = local_result.get("session_meta", {}) or {}
local_stats_preview = local_result.get("local_stats_preview", {}) or {}
topic_clusters = local_result.get("topic_evidence_clusters", []) or []
hero_mentions = local_result.get("hero_mentions", []) or []
content_cues = local_result.get("content_cues", []) or []
timeline_digest = local_result.get("timeline_digest", []) or []
representative_messages = local_result.get("representative_messages", []) or []
return {
"report_meta": {
"room_id": str(session_meta.get("room_id") or "").strip(),
"anchor_day": str(session_meta.get("anchor_day") or "").strip(),
"nickname": str(session_meta.get("nickname") or "").strip(),
"room_name": str(session_meta.get("room_name") or "").strip(),
"session_count": 1,
"message_count": int(session_meta.get("message_count", 0) or 0),
"unique_user_count": int(session_meta.get("unique_user_count", 0) or 0),
},
"local_stats": {
"message_count": int(session_meta.get("message_count", 0) or 0),
"unique_user_count": int(session_meta.get("unique_user_count", 0) or 0),
"top_emotion_bursts": [
{
"text": str(item.get("text") or "").strip(),
"count": int(item.get("count", 0) or 0),
}
for item in content_cues
if str(item.get("kind") or "").strip() == "emotion"
][:8],
"top_repeated_messages": [
{
"text": str(item.get("text") or "").strip(),
"count": int(item.get("count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
}
for item in local_stats_preview.get("top_repeated_messages", [])[:8]
],
"peak_windows": [
{
"start_time": str(item.get("start_time") or "").strip(),
"message_count": int(item.get("message_count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
}
for item in local_stats_preview.get("peak_buckets", [])[:6]
],
},
"topic_evidence_clusters": [
{
"label": str(item.get("label") or "").strip(),
"count": int(item.get("match_count", item.get("count", 0)) or 0),
"user_count": int(item.get("user_count", 0) or 0),
"time_range": (
f"{str(item.get('first_hm') or '').strip()}-{str(item.get('last_hm') or '').strip()}"
).strip("-"),
"keywords": item.get("keywords", []) or [],
"samples": item.get("samples", []) or [],
}
for item in topic_clusters[:6]
],
"compact_scene_material": {
"semantic_fact_hints": {
"hero_mentions": hero_mentions[:6],
},
"content_cues": content_cues[:18],
"timeline_digest": timeline_digest[:20],
},
"representative_messages": representative_messages[:12],
"repeated_messages": [
{
"text": str(item.get("text") or "").strip(),
"count": int(item.get("count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
}
for item in local_stats_preview.get("top_repeated_messages", [])[:12]
],
"burst_terms": [
{
"text": str(item.get("text") or "").strip(),
"count": int(item.get("count", 0) or 0),
}
for item in local_stats_preview.get("top_burst_terms", [])[:12]
],
"peak_buckets": local_stats_preview.get("peak_buckets", [])[:6],
"top_terms": [
{"term": str(keyword).strip(), "count": 0}
for item in topic_clusters[:4]
for keyword in (item.get("keywords", []) or [])[:2]
if str(keyword).strip()
],
}
def _build_preview_report_text(payload: Dict[str, Any]) -> str:
"""
为本地模板预览提供一份稳定的示例文本。
这里不依赖真实 LLM只用已经提纯好的结果拼装固定结构
方便我们快速检查模板是否把关键信息展示完整。
"""
meta = payload.get("report_meta", {}) or {}
topic_clusters = payload.get("topic_evidence_clusters", []) or []
hero_mentions = (
payload.get("compact_scene_material", {})
.get("semantic_fact_hints", {})
.get("hero_mentions", [])
or []
)
repeated_messages = payload.get("repeated_messages", []) or []
burst_terms = payload.get("burst_terms", []) or []
peak_buckets = payload.get("peak_buckets", []) or []
representative_messages = payload.get("representative_messages", []) or []
anchor_day = str(meta.get("anchor_day") or "").strip()
lines = [
f"{anchor_day} 这场直播的弹幕不只是热闹,核心信息也很密:赛事、位置、英雄、团播人物和摄像头梗都有人追着聊。",
"【今日重点信息】",
]
for item in topic_clusters[:5]:
label = str(item.get("label") or "").strip()
time_range = str(item.get("time_range") or "").strip()
count = int(item.get("count", 0) or 0)
samples = item.get("samples", []) or []
sample_text = str(samples[0].get("content") or "").strip()[:42] if samples else ""
if label and sample_text:
lines.append(f"- {label}{time_range or '全场'} 一直有人聊,相关弹幕约 {count} 条,代表说法是「{sample_text}」。")
lines.append("【核心讨论话题】")
for item in topic_clusters[:4]:
label = str(item.get("label") or "").strip()
keywords = [str(keyword).strip() for keyword in (item.get("keywords", []) or [])[:5] if str(keyword).strip()]
if label and keywords:
lines.append(f"- 大家围着 {label} 打转,关键词主要是 {''.join(keywords)}")
lines.append("【英雄与对局焦点】")
for item in hero_mentions[:4]:
hero_name = str(item.get("hero") or "").strip()
mention_count = int(item.get("mention_count", 0) or 0)
samples = item.get("samples", []) or []
sample_text = str(samples[0].get("content") or "").strip()[:36] if samples else ""
if hero_name and sample_text:
lines.append(f"- {hero_name}被点名 {mention_count} 次,弹幕现场直接聊到「{sample_text}」。")
lines.append("【今日笑点】")
if peak_buckets:
top_bucket = peak_buckets[0]
lines.append(
f"- {str(top_bucket.get('start_time') or '')[-8:-3]} 前后是最热窗口,弹幕量直接冲到 {int(top_bucket.get('message_count', 0) or 0)} 条。"
)
if repeated_messages:
item = repeated_messages[0]
lines.append(f"- 复读冠军是「{str(item.get('text') or '').strip()[:24]}」,一天被刷了 {int(item.get('count', 0) or 0)} 次。")
if burst_terms:
item = burst_terms[0]
lines.append(f"- 情绪词「{str(item.get('text') or '').strip()}」集中爆了 {int(item.get('count', 0) or 0)} 次。")
lines.append("【弹幕名场面】")
for item in representative_messages[:5]:
nickname = str(item.get("nickname") or "").strip() or "观众"
content = str(item.get("content") or "").strip()
if content:
lines.append(f"- {nickname}{content[:44]}")
lines.append("【梗王榜】")
for item in repeated_messages[:3]:
lines.append(f"- {str(item.get('text') or '').strip()[:28]}|复读 {int(item.get('count', 0) or 0)}")
lines.append("【收尾播报】")
lines.append("- 本地预览版已经把有效信息和乐子一起塞进同一张图里了。")
return "\n".join(lines)
def run_local_test(file_path: str) -> str:
helper = _load_helper()
resolved_path = str(Path(file_path).resolve())
messages = helper.load_messages_from_file(resolved_path)
file_name = Path(file_path).stem
room_id, date_key = file_name.split("_", 1)
anchor_day = f"{date_key[:4]}-{date_key[4:6]}-{date_key[6:8]}"
session = _build_session(room_id, anchor_day, messages)
payload = helper.build_llm_payload(room_id, session, messages)
compact = payload.get("compact_prompt_assets", {}) or {}
result = {
"file_path": resolved_path,
"message_count": len(messages),
"session_meta": payload.get("session_meta", {}) or {},
"local_stats_preview": {
"top_repeated_messages": (payload.get("repeated_messages", []) or [])[:10],
"top_burst_terms": (payload.get("burst_terms", []) or [])[:10],
"peak_buckets": (payload.get("peak_buckets", []) or [])[:6],
},
"topic_evidence_clusters": ((compact.get("semantic_fact_hints", {}) or {}).get("topic_clusters", []) or [])[:8],
"hero_mentions": ((compact.get("semantic_fact_hints", {}) or {}).get("hero_mentions", []) or [])[:8],
"content_cues": (compact.get("content_cues", []) or [])[:16],
"timeline_digest": (compact.get("timeline_digest", []) or [])[:12],
"representative_messages": (payload.get("representative_messages", []) or [])[:18],
}
output_dir = Path(os.getcwd()) / "temp" / "douyu_materials"
output_dir.mkdir(parents=True, exist_ok=True)
output_path = output_dir / f"{file_name}_local_test_result.json"
output_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
return str(output_path)
def render_fans_preview_from_file(file_path: str) -> str:
"""
读取本地弹幕文件并直接产出新版粉丝日报 HTML 预览。
这样我们每次调整提纯逻辑或模板后,都能用同一条命令快速验收最终展示效果。
"""
local_result_path = Path(run_local_test(file_path))
local_result = json.loads(local_result_path.read_text(encoding="utf-8"))
payload = _build_preview_template_payload(local_result)
report_text = _build_preview_report_text(payload)
report_template = _load_report_template_module()
html_content = report_template.render_fans_daily_report_html(
payload=payload,
fans_report_text=report_text,
)
output_dir = Path(os.getcwd()) / "temp" / "douyu_materials"
output_dir.mkdir(parents=True, exist_ok=True)
file_name = Path(file_path).stem
output_path = output_dir / f"{file_name}_fans_template_preview.html"
output_path.write_text(html_content, encoding="utf-8")
return str(output_path)
if __name__ == "__main__":
sample_files = [
r"plugins\douyu\danmu_test\52876_20260428.txt",
r"plugins\douyu\danmu_test\52876_20260429.txt",
]
for sample in sample_files:
result_path = run_local_test(sample)
preview_path = render_fans_preview_from_file(sample)
print(result_path)
print(preview_path)