优化斗鱼弹幕总结:新增粉丝向弹幕萃取区块并调整提示词语气

This commit is contained in:
liuwei
2026-04-17 11:17:16 +08:00
parent e56c0069cc
commit 5098c191de
2 changed files with 159 additions and 8 deletions

View File

@@ -24,19 +24,37 @@ def _render_list(items: List[str], item_class: str = "bullet-list") -> str:
return f'<ul class="{item_class}">{lis}</ul>' if lis else ""
def _split_summary_blocks(danmu_summary: str) -> tuple[str, List[str]]:
def _split_summary_blocks(danmu_summary: str) -> tuple[str, List[str], List[str]]:
# 这里把 LLM 返回的弹幕总结拆成三部分:
# 1) lead: 顶部总述段落
# 2) insight_items: 常规的复盘要点(运营/观察视角)
# 3) fans_extract_items: 专门给粉丝看的“弹幕萃取”要点
# 约定:当检测到“【粉丝向弹幕萃取】”或同义标记后,后续条目归入 fans_extract_items。
lead_parts = []
insight_items = []
fans_extract_items = []
in_fans_extract_block = False
for line in str(danmu_summary or "").splitlines():
stripped = line.strip()
if not stripped:
continue
# 兼容不同模型可能产出的标题样式,尽量把粉丝向内容稳定识别出来。
if stripped.startswith("【粉丝向弹幕萃取】") or stripped.startswith("粉丝向弹幕萃取") or stripped.startswith("给粉丝看的弹幕萃取"):
in_fans_extract_block = True
continue
if stripped.startswith("- "):
insight_items.append(stripped[2:].strip())
if in_fans_extract_block:
fans_extract_items.append(stripped[2:].strip())
else:
insight_items.append(stripped[2:].strip())
else:
lead_parts.append(stripped)
# 非 bullet 文本在粉丝区块中也保留,避免模型偶发输出短段落导致信息丢失。
if in_fans_extract_block:
fans_extract_items.append(stripped)
else:
lead_parts.append(stripped)
lead = " ".join(lead_parts).strip()
return lead, insight_items
return lead, insight_items, fans_extract_items
def _normalize_summary_bullets(payload: Dict[str, Any], items: List[str], target_count: int = 5) -> List[str]:
@@ -75,6 +93,47 @@ def _normalize_summary_bullets(payload: Dict[str, Any], items: List[str], target
return normalized[:target_count]
def _normalize_fans_extract_bullets(payload: Dict[str, Any], items: List[str], target_count: int = 6) -> List[str]:
# 粉丝向萃取强调“现场感”,优先保留模型给出的条目;
# 不足时再从代表弹幕/重复梗中补齐,避免页面出现空区块。
normalized = [str(item or "").strip() for item in items if str(item or "").strip()]
if len(normalized) >= target_count:
return normalized[:target_count]
supplements: List[str] = []
representative_messages = payload.get("representative_messages", []) or []
repeated_messages = payload.get("repeated_messages", []) or []
burst_terms = payload.get("burst_terms", []) or []
for item in representative_messages[:8]:
nickname = str(item.get("nickname") or "").strip() or "观众"
content = str(item.get("content") or "").strip()
if not content:
continue
supplements.append(f"{nickname}{content[:46]}")
for item in repeated_messages[:6]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
supplements.append(f"复读梗「{text[:34]}」出现 {count} 次。")
for item in burst_terms[:4]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
supplements.append(f"情绪短词「{text}」集中刷了 {count} 次。")
existing = set(normalized)
for item in supplements:
if item not in existing:
normalized.append(item)
existing.add(item)
if len(normalized) >= target_count:
break
return normalized[:target_count]
def _build_template_items(payload: Dict[str, Any], limit: int = 8) -> List[str]:
items: List[str] = []
seen = set()
@@ -400,8 +459,9 @@ def render_daily_report_html(
top_active_users = payload.get("operator_metrics", {}).get("top_active_users", []) or []
audience_trend = payload.get("audience_trend", {}) or {}
lead_summary, danmu_bullets = _split_summary_blocks(danmu_summary)
lead_summary, danmu_bullets, fans_extract_bullets = _split_summary_blocks(danmu_summary)
danmu_bullets = _normalize_summary_bullets(payload, danmu_bullets, target_count=5)
fans_extract_bullets = _normalize_fans_extract_bullets(payload, fans_extract_bullets, target_count=6)
html_doc = f"""<html>
<head>
@@ -589,6 +649,30 @@ def render_daily_report_html(
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 12px;
}}
.fans-panel {{
margin-top: 14px;
padding: 14px 15px 12px;
border-radius: 18px;
background: linear-gradient(180deg, rgba(255,255,255,0.96), rgba(245,250,255,0.94));
border: 1px solid rgba(73, 136, 224, 0.18);
}}
.fans-title {{
color: #1d4ed8;
font-size: 13px;
letter-spacing: .06em;
font-weight: 700;
margin-bottom: 8px;
}}
.fans-list {{
margin: 0;
padding-left: 18px;
}}
.fans-list li {{
color: #1e3a5f;
margin: 8px 0;
line-height: 1.65;
font-size: 14px;
}}
.insight-card {{
padding: 15px 16px;
border-radius: 18px;
@@ -946,6 +1030,10 @@ def render_daily_report_html(
<div class="insight-grid">
{_render_insight_cards(danmu_bullets)}
</div>
<div class="fans-panel">
<div class="fans-title">给粉丝看的弹幕萃取</div>
{_render_list(fans_extract_bullets, item_class="fans-list")}
</div>
</div>
<div class="aside-card">
<div class="aside-title">高频梗</div>