1. 为本地测试脚本增加粉丝日报 HTML 预览输出,统一映射新版模板需要的数据结构\n2. 内置稳定的预览文案拼装逻辑,方便不依赖LLM也能本地验收页面效果\n3. 压缩粉丝日报模板的卡片间距、字号、行高和高度,让同样的信息更紧凑简约地展示
316 lines
14 KiB
Python
316 lines
14 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
斗鱼弹幕本地测试脚本。
|
||
|
||
用途:
|
||
1. 直接读取用户提供的本地弹幕文本样本;
|
||
2. 跑一遍“本地提纯 + 证据簇提炼”链路;
|
||
3. 将结果输出到 temp/douyu_materials,方便人工查看;
|
||
4. 不依赖 Redis、Dify、直播 session。
|
||
"""
|
||
|
||
import importlib.util
|
||
import json
|
||
import os
|
||
import sys
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List
|
||
|
||
|
||
def _load_helper():
|
||
current_dir = Path(__file__).resolve().parent
|
||
module_path = current_dir / "danmu_summary.py"
|
||
spec = importlib.util.spec_from_file_location("douyu_danmu_summary_local", module_path)
|
||
module = importlib.util.module_from_spec(spec)
|
||
assert spec.loader is not None
|
||
spec.loader.exec_module(module)
|
||
return module.DouyuDanmuSummaryHelper
|
||
|
||
|
||
def _load_report_template_module():
|
||
"""
|
||
单独按文件路径加载模板模块。
|
||
这样本地预览不需要完整初始化插件,也不依赖 Redis 或其他运行时对象。
|
||
"""
|
||
current_dir = Path(__file__).resolve().parent
|
||
project_root = current_dir.parent.parent
|
||
project_root_str = str(project_root)
|
||
# 把项目根目录补进 sys.path,保证 report_template.py 内部引用 utils 等项目模块时可正常导入。
|
||
if project_root_str not in sys.path:
|
||
sys.path.insert(0, project_root_str)
|
||
module_path = current_dir / "report_template.py"
|
||
spec = importlib.util.spec_from_file_location("douyu_report_template_local", module_path)
|
||
module = importlib.util.module_from_spec(spec)
|
||
assert spec.loader is not None
|
||
spec.loader.exec_module(module)
|
||
return module
|
||
|
||
|
||
def _build_session(room_id: str, anchor_day: str, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||
ordered = sorted(messages, key=lambda item: item.get("timestamp") or datetime.min)
|
||
if not ordered:
|
||
return {
|
||
"session_id": f"{room_id}_{anchor_day.replace('-', '')}_empty",
|
||
"room_id": room_id,
|
||
"anchor_day": anchor_day,
|
||
"nickname": "",
|
||
"room_name": "",
|
||
"segments": [],
|
||
}
|
||
return {
|
||
"session_id": f"{room_id}_{anchor_day.replace('-', '')}_local_test",
|
||
"room_id": room_id,
|
||
"anchor_day": anchor_day,
|
||
"nickname": "",
|
||
"room_name": "",
|
||
"segments": [{
|
||
"start_time": ordered[0]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"),
|
||
"end_time": ordered[-1]["timestamp"].strftime("%Y-%m-%d %H:%M:%S"),
|
||
}],
|
||
}
|
||
|
||
|
||
def _build_preview_template_payload(local_result: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""
|
||
将本地测试结果转成粉丝日报模板真正需要的结构。
|
||
这样预览链路和正式模板共用同一套字段命名,后续查问题更直观。
|
||
"""
|
||
session_meta = local_result.get("session_meta", {}) or {}
|
||
local_stats_preview = local_result.get("local_stats_preview", {}) or {}
|
||
topic_clusters = local_result.get("topic_evidence_clusters", []) or []
|
||
hero_mentions = local_result.get("hero_mentions", []) or []
|
||
content_cues = local_result.get("content_cues", []) or []
|
||
timeline_digest = local_result.get("timeline_digest", []) or []
|
||
representative_messages = local_result.get("representative_messages", []) or []
|
||
|
||
return {
|
||
"report_meta": {
|
||
"room_id": str(session_meta.get("room_id") or "").strip(),
|
||
"anchor_day": str(session_meta.get("anchor_day") or "").strip(),
|
||
"nickname": str(session_meta.get("nickname") or "").strip(),
|
||
"room_name": str(session_meta.get("room_name") or "").strip(),
|
||
"session_count": 1,
|
||
"message_count": int(session_meta.get("message_count", 0) or 0),
|
||
"unique_user_count": int(session_meta.get("unique_user_count", 0) or 0),
|
||
},
|
||
"local_stats": {
|
||
"message_count": int(session_meta.get("message_count", 0) or 0),
|
||
"unique_user_count": int(session_meta.get("unique_user_count", 0) or 0),
|
||
"top_emotion_bursts": [
|
||
{
|
||
"text": str(item.get("text") or "").strip(),
|
||
"count": int(item.get("count", 0) or 0),
|
||
}
|
||
for item in content_cues
|
||
if str(item.get("kind") or "").strip() == "emotion"
|
||
][:8],
|
||
"top_repeated_messages": [
|
||
{
|
||
"text": str(item.get("text") or "").strip(),
|
||
"count": int(item.get("count", 0) or 0),
|
||
"user_count": int(item.get("user_count", 0) or 0),
|
||
}
|
||
for item in local_stats_preview.get("top_repeated_messages", [])[:8]
|
||
],
|
||
"peak_windows": [
|
||
{
|
||
"start_time": str(item.get("start_time") or "").strip(),
|
||
"message_count": int(item.get("message_count", 0) or 0),
|
||
"user_count": int(item.get("user_count", 0) or 0),
|
||
}
|
||
for item in local_stats_preview.get("peak_buckets", [])[:6]
|
||
],
|
||
},
|
||
"topic_evidence_clusters": [
|
||
{
|
||
"label": str(item.get("label") or "").strip(),
|
||
"count": int(item.get("match_count", item.get("count", 0)) or 0),
|
||
"user_count": int(item.get("user_count", 0) or 0),
|
||
"time_range": (
|
||
f"{str(item.get('first_hm') or '').strip()}-{str(item.get('last_hm') or '').strip()}"
|
||
).strip("-"),
|
||
"keywords": item.get("keywords", []) or [],
|
||
"samples": item.get("samples", []) or [],
|
||
}
|
||
for item in topic_clusters[:6]
|
||
],
|
||
"compact_scene_material": {
|
||
"semantic_fact_hints": {
|
||
"hero_mentions": hero_mentions[:6],
|
||
},
|
||
"content_cues": content_cues[:18],
|
||
"timeline_digest": timeline_digest[:20],
|
||
},
|
||
"representative_messages": representative_messages[:12],
|
||
"repeated_messages": [
|
||
{
|
||
"text": str(item.get("text") or "").strip(),
|
||
"count": int(item.get("count", 0) or 0),
|
||
"user_count": int(item.get("user_count", 0) or 0),
|
||
}
|
||
for item in local_stats_preview.get("top_repeated_messages", [])[:12]
|
||
],
|
||
"burst_terms": [
|
||
{
|
||
"text": str(item.get("text") or "").strip(),
|
||
"count": int(item.get("count", 0) or 0),
|
||
}
|
||
for item in local_stats_preview.get("top_burst_terms", [])[:12]
|
||
],
|
||
"peak_buckets": local_stats_preview.get("peak_buckets", [])[:6],
|
||
"top_terms": [
|
||
{"term": str(keyword).strip(), "count": 0}
|
||
for item in topic_clusters[:4]
|
||
for keyword in (item.get("keywords", []) or [])[:2]
|
||
if str(keyword).strip()
|
||
],
|
||
}
|
||
|
||
|
||
def _build_preview_report_text(payload: Dict[str, Any]) -> str:
|
||
"""
|
||
为本地模板预览提供一份稳定的示例文本。
|
||
这里不依赖真实 LLM,只用已经提纯好的结果拼装固定结构,
|
||
方便我们快速检查模板是否把关键信息展示完整。
|
||
"""
|
||
meta = payload.get("report_meta", {}) or {}
|
||
topic_clusters = payload.get("topic_evidence_clusters", []) or []
|
||
hero_mentions = (
|
||
payload.get("compact_scene_material", {})
|
||
.get("semantic_fact_hints", {})
|
||
.get("hero_mentions", [])
|
||
or []
|
||
)
|
||
repeated_messages = payload.get("repeated_messages", []) or []
|
||
burst_terms = payload.get("burst_terms", []) or []
|
||
peak_buckets = payload.get("peak_buckets", []) or []
|
||
representative_messages = payload.get("representative_messages", []) or []
|
||
anchor_day = str(meta.get("anchor_day") or "").strip()
|
||
|
||
lines = [
|
||
f"{anchor_day} 这场直播的弹幕不只是热闹,核心信息也很密:赛事、位置、英雄、团播人物和摄像头梗都有人追着聊。",
|
||
"【今日重点信息】",
|
||
]
|
||
for item in topic_clusters[:5]:
|
||
label = str(item.get("label") or "").strip()
|
||
time_range = str(item.get("time_range") or "").strip()
|
||
count = int(item.get("count", 0) or 0)
|
||
samples = item.get("samples", []) or []
|
||
sample_text = str(samples[0].get("content") or "").strip()[:42] if samples else ""
|
||
if label and sample_text:
|
||
lines.append(f"- {label}从 {time_range or '全场'} 一直有人聊,相关弹幕约 {count} 条,代表说法是「{sample_text}」。")
|
||
|
||
lines.append("【核心讨论话题】")
|
||
for item in topic_clusters[:4]:
|
||
label = str(item.get("label") or "").strip()
|
||
keywords = [str(keyword).strip() for keyword in (item.get("keywords", []) or [])[:5] if str(keyword).strip()]
|
||
if label and keywords:
|
||
lines.append(f"- 大家围着 {label} 打转,关键词主要是 {'、'.join(keywords)}。")
|
||
|
||
lines.append("【英雄与对局焦点】")
|
||
for item in hero_mentions[:4]:
|
||
hero_name = str(item.get("hero") or "").strip()
|
||
mention_count = int(item.get("mention_count", 0) or 0)
|
||
samples = item.get("samples", []) or []
|
||
sample_text = str(samples[0].get("content") or "").strip()[:36] if samples else ""
|
||
if hero_name and sample_text:
|
||
lines.append(f"- {hero_name}被点名 {mention_count} 次,弹幕现场直接聊到「{sample_text}」。")
|
||
|
||
lines.append("【今日笑点】")
|
||
if peak_buckets:
|
||
top_bucket = peak_buckets[0]
|
||
lines.append(
|
||
f"- {str(top_bucket.get('start_time') or '')[-8:-3]} 前后是最热窗口,弹幕量直接冲到 {int(top_bucket.get('message_count', 0) or 0)} 条。"
|
||
)
|
||
if repeated_messages:
|
||
item = repeated_messages[0]
|
||
lines.append(f"- 复读冠军是「{str(item.get('text') or '').strip()[:24]}」,一天被刷了 {int(item.get('count', 0) or 0)} 次。")
|
||
if burst_terms:
|
||
item = burst_terms[0]
|
||
lines.append(f"- 情绪词「{str(item.get('text') or '').strip()}」集中爆了 {int(item.get('count', 0) or 0)} 次。")
|
||
|
||
lines.append("【弹幕名场面】")
|
||
for item in representative_messages[:5]:
|
||
nickname = str(item.get("nickname") or "").strip() or "观众"
|
||
content = str(item.get("content") or "").strip()
|
||
if content:
|
||
lines.append(f"- {nickname}:{content[:44]}")
|
||
|
||
lines.append("【梗王榜】")
|
||
for item in repeated_messages[:3]:
|
||
lines.append(f"- {str(item.get('text') or '').strip()[:28]}|复读 {int(item.get('count', 0) or 0)} 次")
|
||
|
||
lines.append("【收尾播报】")
|
||
lines.append("- 本地预览版已经把有效信息和乐子一起塞进同一张图里了。")
|
||
return "\n".join(lines)
|
||
|
||
|
||
def run_local_test(file_path: str) -> str:
|
||
helper = _load_helper()
|
||
resolved_path = str(Path(file_path).resolve())
|
||
messages = helper.load_messages_from_file(resolved_path)
|
||
file_name = Path(file_path).stem
|
||
room_id, date_key = file_name.split("_", 1)
|
||
anchor_day = f"{date_key[:4]}-{date_key[4:6]}-{date_key[6:8]}"
|
||
session = _build_session(room_id, anchor_day, messages)
|
||
payload = helper.build_llm_payload(room_id, session, messages)
|
||
|
||
compact = payload.get("compact_prompt_assets", {}) or {}
|
||
result = {
|
||
"file_path": resolved_path,
|
||
"message_count": len(messages),
|
||
"session_meta": payload.get("session_meta", {}) or {},
|
||
"local_stats_preview": {
|
||
"top_repeated_messages": (payload.get("repeated_messages", []) or [])[:10],
|
||
"top_burst_terms": (payload.get("burst_terms", []) or [])[:10],
|
||
"peak_buckets": (payload.get("peak_buckets", []) or [])[:6],
|
||
},
|
||
"topic_evidence_clusters": ((compact.get("semantic_fact_hints", {}) or {}).get("topic_clusters", []) or [])[:8],
|
||
"hero_mentions": ((compact.get("semantic_fact_hints", {}) or {}).get("hero_mentions", []) or [])[:8],
|
||
"content_cues": (compact.get("content_cues", []) or [])[:16],
|
||
"timeline_digest": (compact.get("timeline_digest", []) or [])[:12],
|
||
"representative_messages": (payload.get("representative_messages", []) or [])[:18],
|
||
}
|
||
|
||
output_dir = Path(os.getcwd()) / "temp" / "douyu_materials"
|
||
output_dir.mkdir(parents=True, exist_ok=True)
|
||
output_path = output_dir / f"{file_name}_local_test_result.json"
|
||
output_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
|
||
return str(output_path)
|
||
|
||
|
||
def render_fans_preview_from_file(file_path: str) -> str:
|
||
"""
|
||
读取本地弹幕文件并直接产出新版粉丝日报 HTML 预览。
|
||
这样我们每次调整提纯逻辑或模板后,都能用同一条命令快速验收最终展示效果。
|
||
"""
|
||
local_result_path = Path(run_local_test(file_path))
|
||
local_result = json.loads(local_result_path.read_text(encoding="utf-8"))
|
||
payload = _build_preview_template_payload(local_result)
|
||
report_text = _build_preview_report_text(payload)
|
||
report_template = _load_report_template_module()
|
||
html_content = report_template.render_fans_daily_report_html(
|
||
payload=payload,
|
||
fans_report_text=report_text,
|
||
)
|
||
|
||
output_dir = Path(os.getcwd()) / "temp" / "douyu_materials"
|
||
output_dir.mkdir(parents=True, exist_ok=True)
|
||
file_name = Path(file_path).stem
|
||
output_path = output_dir / f"{file_name}_fans_template_preview.html"
|
||
output_path.write_text(html_content, encoding="utf-8")
|
||
return str(output_path)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sample_files = [
|
||
r"plugins\douyu\danmu_test\52876_20260428.txt",
|
||
r"plugins\douyu\danmu_test\52876_20260429.txt",
|
||
]
|
||
for sample in sample_files:
|
||
result_path = run_local_test(sample)
|
||
preview_path = render_fans_preview_from_file(sample)
|
||
print(result_path)
|
||
print(preview_path)
|