重构斗鱼粉丝日报为信息优先结构

1. 更新粉丝日报提示词,优先提炼赛事、位置、英雄、对局和场外有效信息\n2. 扩展模板解析与渲染逻辑,支持今日重点信息、核心讨论话题、英雄与对局焦点等新板块\n3. 优化粉丝日报兜底文案与模板展示,让本地提纯结果和LLM语义总结共同参与输出
This commit is contained in:
liuwei
2026-04-29 15:06:56 +08:00
parent 642b55afe0
commit 4386d0df75
3 changed files with 193 additions and 13 deletions

View File

@@ -91,6 +91,15 @@ def _split_fans_report_blocks(report_text: str) -> Dict[str, Any]:
即便模型没有完全按约定输出,这里也会尽量兜底,保证页面不空。
"""
header_alias_map = {
"今日重点信息": "key_info",
"重点信息": "key_info",
"有效信息": "key_info",
"核心讨论话题": "topic_focus",
"讨论话题": "topic_focus",
"核心话题": "topic_focus",
"英雄与对局焦点": "hero_focus",
"对局焦点": "hero_focus",
"英雄焦点": "hero_focus",
"今日笑点": "laugh_points",
"笑点": "laugh_points",
"欢乐总结": "laugh_points",
@@ -106,6 +115,9 @@ def _split_fans_report_blocks(report_text: str) -> Dict[str, Any]:
}
sections = {
"lead": "",
"key_info": [],
"topic_focus": [],
"hero_focus": [],
"laugh_points": [],
"famous_scenes": [],
"meme_rank": [],
@@ -510,6 +522,97 @@ def _build_fans_effective_info_lines(payload: Dict[str, Any], limit: int = 6) ->
return lines[:limit]
def _build_local_topic_focus_lines(payload: Dict[str, Any], limit: int = 4) -> List[str]:
"""
为“核心讨论话题”补充本地可直接确定的摘要句。
这里故意不让模型自己重新发明事实,而是把主题簇已经聚好的结果转成人能读懂的话。
"""
lines: List[str] = []
seen = set()
def push(text: str) -> None:
value = str(text or "").strip()
if not value or value in seen:
return
seen.add(value)
lines.append(value)
for item in (payload.get("topic_evidence_clusters", []) or [])[:4]:
label = str(item.get("label") or "").strip()
keywords = [str(keyword).strip() for keyword in (item.get("keywords", []) or [])[:5] if str(keyword).strip()]
count = int(item.get("count", 0) or 0)
if label and keywords:
push(f"{label}是高频主线,相关讨论约 {count} 条,关键词集中在 {''.join(keywords)}")
elif label:
push(f"{label}是今天反复被拉出来聊的主线之一,相关讨论约 {count} 条。")
if len(lines) >= limit:
return lines[:limit]
return lines[:limit]
def _build_local_hero_focus_lines(payload: Dict[str, Any], limit: int = 4) -> List[str]:
"""
为“英雄与对局焦点”准备本地兜底。
这部分直接复用英雄提及聚类,优先强调出现频次和代表发言,方便粉丝快速看懂今天在聊什么英雄。
"""
hero_mentions = (
payload.get("compact_scene_material", {})
.get("semantic_fact_hints", {})
.get("hero_mentions", [])
or []
)
lines: List[str] = []
seen = set()
def push(text: str) -> None:
value = str(text or "").strip()
if not value or value in seen:
return
seen.add(value)
lines.append(value)
for item in hero_mentions[:4]:
hero_name = str(item.get("hero") or "").strip()
mention_count = int(item.get("mention_count", 0) or 0)
samples = item.get("samples", []) or []
sample_text = ""
if samples:
sample_text = str(samples[0].get("content") or "").strip()[:36]
if hero_name and sample_text:
push(f"{hero_name}被提到 {mention_count} 次,现场典型弹幕是「{sample_text}」。")
elif hero_name:
push(f"{hero_name}是今天的主要英雄讨论点之一,被提到 {mention_count} 次。")
if len(lines) >= limit:
return lines[:limit]
return lines[:limit]
def _normalize_information_section_items(
llm_items: List[str],
local_items: List[str],
target_count: int,
) -> List[str]:
"""
将模型提炼结果与本地事实兜底合并。
设计目标:
1. 先尊重模型已经总结好的“可读句子”;
2. 如果模型漏了,就用本地证据补足;
3. 始终保证最终区块有信息量,而不是空标题。
"""
normalized: List[str] = []
seen = set()
for source in (llm_items, local_items):
for item in source:
value = str(item or "").strip()
if not value or value in seen:
continue
seen.add(value)
normalized.append(value)
if len(normalized) >= target_count:
return normalized[:target_count]
return normalized[:target_count]
def _render_fans_info_cards(items: List[str]) -> str:
blocks = []
for item in items[:6]:
@@ -929,6 +1032,21 @@ def render_fans_daily_report_html(
f" | 围观群众 {meta.get('unique_user_count', 0)}"
)
sections = _split_fans_report_blocks(fans_report_text)
effective_info_lines = _normalize_information_section_items(
sections.get("key_info", []),
_build_fans_effective_info_lines(payload),
target_count=6,
)
topic_focus_lines = _normalize_information_section_items(
sections.get("topic_focus", []),
_build_local_topic_focus_lines(payload),
target_count=4,
)
hero_focus_lines = _normalize_information_section_items(
sections.get("hero_focus", []),
_build_local_hero_focus_lines(payload),
target_count=4,
)
laugh_points = _normalize_funny_bullets(payload, sections.get("laugh_points", []), target_count=4)
famous_scenes = _normalize_scene_bullets(payload, sections.get("famous_scenes", []), target_count=5)
meme_rank = _normalize_rank_bullets(payload, sections.get("meme_rank", []), target_count=3)
@@ -954,8 +1072,11 @@ def render_fans_daily_report_html(
"lead_text": lead_text,
# 粉丝版不再只做“乐子文案展示”,而是补进本地提纯后的有效信息区。
"fans_metrics_html": Markup(_render_fans_metric_cards(_build_fans_fun_metrics(payload))),
"effective_info_html": Markup(_render_fans_info_cards(_build_fans_effective_info_lines(payload))),
"effective_summary_html": Markup(_render_list(effective_info_lines, item_class="section-summary-list")),
"effective_info_html": Markup(_render_fans_info_cards(effective_info_lines)),
"topic_focus_html": Markup(_render_list(topic_focus_lines, item_class="section-summary-list")),
"topic_clusters_html": Markup(_render_topic_clusters(topic_clusters)),
"hero_focus_html": Markup(_render_list(hero_focus_lines, item_class="section-summary-list")),
"hero_mentions_html": Markup(_render_hero_mentions(hero_mentions)),
"hot_windows_html": Markup(_render_hot_window_cards(local_stats.get("peak_windows", []) or [])),
"repeat_digest_html": Markup(_render_repeat_digest(payload)),