Files
abot/plugins/douyu/main.py

3941 lines
190 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import asyncio
import json
from collections import Counter
from datetime import datetime, timedelta
import os
from pathlib import Path
import re
import threading
import time
from typing import Dict, Any, List, Optional, Tuple, Set
import aiohttp
from loguru import logger
import ssl
import zlib
try:
import websocket
except ImportError:
websocket = None
from base.plugin_common.message_plugin_interface import MessagePluginInterface
from base.plugin_common.plugin_interface import PluginStatus
from db.connection import DBConnectionManager
from utils.ai.unified_llm import UnifiedLLMClient
from plugins.douyu.danmu_summary import DouyuDanmuSummaryHelper
from plugins.douyu.report_template import render_daily_report_html, render_fans_daily_report_html
from utils.decorator.async_job import async_job
from utils.decorator.plugin_decorators import plugin_stats_decorator
from utils.decorator.points_decorator import plugin_points_cost
from utils.markdown_to_image import convert_md_str_to_image, html_to_image
from utils.robot_cmd.robot_command import Feature, PermissionStatus, GroupBotManager
from utils.string_utils import remove_reasoning_content
from wechat_ipad import WechatAPIClient
from wechat_ipad.models.appmsg_xml import DOUYU_MESSAGE_XML
class DouyuDanmuRecorder:
def __init__(self, room_id: str, user_agent: str, stats_callback=None, stats_sample_interval_seconds: int = 60):
self.room_id = room_id
self.user_agent = user_agent
self.stats_callback = stats_callback
self.stats_sample_interval_seconds = max(0, int(stats_sample_interval_seconds or 0))
self._thread: Optional[threading.Thread] = None
self._stop_event = threading.Event()
self._ws: Optional[websocket.WebSocketApp] = None
self._buffer: List[str] = []
self._buffer_limit = 10
self._buffer_date: Optional[str] = None
self._lock = threading.Lock()
self._websocket_available = websocket is not None
self._latest_vip_count: Optional[int] = None
self._latest_diamond_count: Optional[int] = None
self._last_stats_signature: Tuple[Optional[int], Optional[int]] = (None, None)
self._connect_retry_count = 3
self._connect_retry_delay_seconds = 1
def _encode(self, msg: str) -> bytes:
content = msg.encode("utf-8") + b"\x00"
length = len(content) + 8
head = length.to_bytes(4, "little") * 2
head += (689).to_bytes(2, "little")
head += b"\x00\x00"
return head + content
@staticmethod
def _parse_parts(line: str) -> Dict[str, Any]:
parts: Dict[str, Any] = {}
for pair in line.split("/"):
if "@=" in pair:
key, value = pair.split("@=", 1)
parts[key] = value
return parts
@staticmethod
def _safe_int(value: Any, default: Optional[int] = None) -> Optional[int]:
try:
return int(str(value))
except Exception:
return default
def _maybe_emit_stats(self, force: bool = False) -> None:
if not self.stats_callback:
return
if self._latest_vip_count is None and self._latest_diamond_count is None:
return
signature = (self._latest_vip_count, self._latest_diamond_count)
if not force:
if signature == self._last_stats_signature:
return
point = {
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"vip_count": self._latest_vip_count,
"diamond_count": self._latest_diamond_count,
}
try:
self.stats_callback(self.room_id, point)
self._last_stats_signature = signature
except Exception as e:
logger.warning(f"斗鱼人数采样回调失败({self.room_id}): {e}")
def _on_message(self, ws, message):
try:
decompressed = zlib.decompress(message, -zlib.MAX_WBITS)
data = decompressed.decode("utf-8", errors="ignore")
except Exception:
data = message.decode("utf-8", errors="ignore")
for line in data.split("\x00"):
line = line.strip()
if not line:
continue
parts = self._parse_parts(line)
msg_type = str(parts.get("type") or "").strip()
if msg_type == "oni":
vip_count = self._safe_int(parts.get("vn"))
if vip_count is not None:
self._latest_vip_count = vip_count
self._maybe_emit_stats()
continue
if msg_type == "dfnum":
diamond_count = self._safe_int(parts.get("dfc"))
if diamond_count is not None:
self._latest_diamond_count = diamond_count
self._maybe_emit_stats()
continue
if msg_type != "chatmsg":
continue
nick = parts.get("nn", "未知")
txt = parts.get("txt", "")
uid = parts.get("uid", "未知")
level = parts.get("level", "0")
fan_group = parts.get("bnn", "")
fan_level = parts.get("bl", "0")
time_stamp = parts.get("cst", "")
if time_stamp:
try:
if time_stamp.isdigit():
ts = int(time_stamp)
if ts > 10 ** 12:
ts = ts / 1000
dt = datetime.fromtimestamp(ts)
time_str = dt.strftime("%Y-%m-%d %H:%M:%S")
else:
dt = datetime.strptime(time_stamp, "%Y-%m-%d %H:%M:%S")
time_str = dt.strftime("%Y-%m-%d %H:%M:%S")
except Exception:
time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
else:
time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
output = f"[{time_str}] {nick} (UID: {uid}, Lv{level}"
if fan_group:
output += f" / {fan_group} Lv{fan_level}"
output += f"){txt}"
self._append_and_maybe_flush(output)
def _flush_locked(self):
if not self._buffer or self._buffer_date is None:
return
dir_path = os.path.join("temp", "douyu_danmu", self._buffer_date)
os.makedirs(dir_path, exist_ok=True)
file_name = os.path.join(dir_path, f"{self.room_id}_{self._buffer_date}.txt")
data = "\n".join(self._buffer) + "\n"
with open(file_name, "a", encoding="utf-8") as f:
f.write(data)
self._buffer.clear()
def _append_and_maybe_flush(self, line: str):
now = datetime.now()
date_str = now.strftime("%Y%m%d")
with self._lock:
if self._buffer_date is None:
self._buffer_date = date_str
elif date_str != self._buffer_date:
self._flush_locked()
self._buffer_date = date_str
self._buffer.append(line)
if len(self._buffer) >= self._buffer_limit:
self._flush_locked()
def _flush(self):
with self._lock:
self._flush_locked()
def _on_open(self, ws):
ws.send(self._encode(f"type@=loginreq/roomid@={self.room_id}/dmbt@=chrome/dmbv@=0/"))
ws.send(self._encode(f"type@=joingroup/rid@={self.room_id}/gid@=-9999/"))
def heartbeat():
while ws.sock and ws.sock.connected and not self._stop_event.is_set():
try:
ws.send(self._encode("type@=mrkl/"))
except Exception:
break
time.sleep(38)
threading.Thread(target=heartbeat, daemon=True).start()
def _on_error(self, ws, error):
logger.error(f"斗鱼弹幕错误({self.room_id}): {error}")
def _on_close(self, ws, code, msg):
logger.info(f"斗鱼弹幕连接关闭({self.room_id}): {code} {msg}")
def _run(self):
if not self._websocket_available:
logger.error(f"websocket-client 未安装,无法记录弹幕({self.room_id})")
return
try:
websocket.enableTrace(False)
ws_urls = [
"wss://danmuproxy.douyu.com:8501/",
"wss://danmuproxy.douyu.com:8502/",
"wss://danmuproxy.douyu.com:8503/",
"wss://danmuproxy.douyu.com:8504/",
"wss://danmuproxy.douyu.com:8505/",
"wss://danmuproxy.douyu.com:8506/",
]
sslopt = {
"cert_reqs": ssl.CERT_NONE,
"ssl_version": ssl.PROTOCOL_TLS_CLIENT,
"ciphers": "DEFAULT@SECLEVEL=1",
}
headers = {"User-Agent": self.user_agent}
for url in ws_urls:
if self._stop_event.is_set():
break
for attempt in range(1, self._connect_retry_count + 1):
if self._stop_event.is_set():
break
reconnect_needed = False
try:
self._ws = websocket.WebSocketApp(
url,
on_open=self._on_open,
on_message=self._on_message,
on_error=self._on_error,
on_close=self._on_close,
header=headers,
)
self._ws.run_forever(sslopt=sslopt, ping_interval=30, ping_timeout=10)
if self._stop_event.is_set():
break
reconnect_needed = True
except Exception as e:
if attempt < self._connect_retry_count:
logger.warning(
f"斗鱼弹幕连接失败({self.room_id}),第{attempt}/{self._connect_retry_count}次重试: "
f"url={url} err={e}"
)
time.sleep(self._connect_retry_delay_seconds)
continue
logger.error(
f"斗鱼弹幕连接失败({self.room_id}),已重试{self._connect_retry_count}次: "
f"url={url} err={e}"
)
finally:
self._ws = None
if reconnect_needed and attempt < self._connect_retry_count:
logger.warning(
f"斗鱼弹幕连接中断({self.room_id}),第{attempt}/{self._connect_retry_count}次重试: url={url}"
)
time.sleep(self._connect_retry_delay_seconds)
continue
if reconnect_needed and attempt >= self._connect_retry_count:
logger.error(
f"斗鱼弹幕连接中断({self.room_id}),已重试{self._connect_retry_count}次: url={url}"
)
break
if self._stop_event.is_set():
break
time.sleep(self._connect_retry_delay_seconds)
finally:
self._ws = None
def start(self):
if self._thread and self._thread.is_alive():
return
self._stop_event.clear()
self._thread = threading.Thread(target=self._run, daemon=True)
self._thread.start()
def stop(self):
self._maybe_emit_stats(force=True)
self._flush()
self._stop_event.set()
if self._ws:
try:
self._ws.close()
except Exception:
pass
class DouyuRedisManager:
def __init__(self, db_manager: DBConnectionManager):
self.redis = db_manager.get_redis_connection()
self.prefix = "bot:douyu:"
def add_group_room(self, group_id: str, room_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:rooms"
return self.redis.sadd(key, room_id) >= 0
def remove_group_room(self, group_id: str, room_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:rooms"
return self.redis.srem(key, room_id) >= 0
def list_group_rooms(self, group_id: str) -> List[str]:
key = f"{self.prefix}group:{group_id}:rooms"
rooms = self.redis.smembers(key) or set()
result = []
for r in rooms:
result.append(r.decode("utf-8") if isinstance(r, bytes) else r)
return sorted(result)
def all_subscribed_rooms(self) -> Set[str]:
groups = GroupBotManager.get_group_list()
rooms: Set[str] = set()
for gid in groups:
for r in self.list_group_rooms(gid):
rooms.add(r)
return rooms
def groups_for_room(self, room_id: str) -> List[str]:
groups = GroupBotManager.get_group_list()
res = []
for gid in groups:
if room_id in set(self.list_group_rooms(gid)):
res.append(gid)
return res
# --- 鱼吧相关方法 ---
def add_group_yuba(self, group_id: str, hash_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:yubas"
return self.redis.sadd(key, hash_id) >= 0
def remove_group_yuba(self, group_id: str, hash_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:yubas"
return self.redis.srem(key, hash_id) >= 0
def list_group_yubas(self, group_id: str) -> List[str]:
key = f"{self.prefix}group:{group_id}:yubas"
yubas = self.redis.smembers(key) or set()
result = []
for y in yubas:
result.append(y.decode("utf-8") if isinstance(y, bytes) else y)
return sorted(result)
def all_subscribed_yubas(self) -> Set[str]:
groups = GroupBotManager.get_group_list()
yubas: Set[str] = set()
for gid in groups:
for y in self.list_group_yubas(gid):
yubas.add(y)
return yubas
def groups_for_yuba(self, hash_id: str) -> List[str]:
groups = GroupBotManager.get_group_list()
res = []
for gid in groups:
if hash_id in set(self.list_group_yubas(gid)):
res.append(gid)
return res
def get_yuba_last_id(self, hash_id: str) -> Optional[str]:
key = f"{self.prefix}yuba_last_id:{hash_id}"
data = self.redis.get(key)
if not data:
return None
return data.decode("utf-8") if isinstance(data, bytes) else data
def set_yuba_last_id(self, hash_id: str, feed_id: str) -> bool:
key = f"{self.prefix}yuba_last_id:{hash_id}"
return self.redis.set(key, feed_id)
# --- 提醒名单方法 ---
def add_group_subscriber(self, group_id: str, user_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:subscribers"
return self.redis.sadd(key, user_id) >= 0
def remove_group_subscriber(self, group_id: str, user_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:subscribers"
return self.redis.srem(key, user_id) >= 0
def list_group_subscribers(self, group_id: str) -> List[str]:
key = f"{self.prefix}group:{group_id}:subscribers"
subs = self.redis.smembers(key) or set()
result = []
for s in subs:
result.append(s.decode("utf-8") if isinstance(s, bytes) else s)
return sorted(result)
def get_room_status(self, room_id: str) -> Optional[Dict[str, Any]]:
key = f"{self.prefix}room_status:{room_id}"
data = self.redis.get(key)
if not data:
return None
if isinstance(data, bytes):
data = data.decode("utf-8")
try:
return json.loads(data)
except Exception:
return None
def set_room_status(self, room_id: str, status: Dict[str, Any]) -> bool:
key = f"{self.prefix}room_status:{room_id}"
return self.redis.set(key, json.dumps(status, ensure_ascii=False))
def get_room_background_profile(self, room_id: str) -> Optional[Dict[str, Any]]:
"""
读取房间的“自动背景画像”缓存。
这里单独拆 key而不是混进 room_status主要是为了
1. 背景画像更新频率远低于直播状态;
2. 画像缓存适合设置较长 TTL和在线状态的实时性要求不同
3. 后续若要单独清理/刷新画像,不会影响直播状态主链路。
"""
key = f"{self.prefix}room_background_profile:{room_id}"
data = self.redis.get(key)
if not data:
return None
if isinstance(data, bytes):
data = data.decode("utf-8")
try:
return json.loads(data)
except Exception:
return None
def set_room_background_profile(
self,
room_id: str,
profile: Dict[str, Any],
ttl_seconds: int = 0,
) -> bool:
"""
写入房间背景画像缓存。
说明:
1. Redis 中持久化的是“已经清洗过的结构化 JSON”避免下游每次再解析原始 LLM 文本;
2. 默认允许带 TTL便于后续自动过期减少过时职业信息长期残留
3. 不强依赖 TTL为 0 时按永久 key 写入,兼容本地调试场景。
"""
key = f"{self.prefix}room_background_profile:{room_id}"
payload = json.dumps(profile or {}, ensure_ascii=False)
ttl_seconds = max(int(ttl_seconds or 0), 0)
if ttl_seconds > 0:
return bool(self.redis.set(key, payload, ex=ttl_seconds))
return bool(self.redis.set(key, payload))
def delete_room_background_profile(self, room_id: str) -> bool:
"""
删除房间背景画像缓存。
当前主流程还没有开放手动命令入口,但底层先保留删除能力,
方便后续做“强制刷新画像”或后台运维修复。
"""
key = f"{self.prefix}room_background_profile:{room_id}"
return self.redis.delete(key) >= 0
def get_room_session(self, room_id: str, session_id: str) -> Optional[Dict[str, Any]]:
key = f"{self.prefix}room:{room_id}:session:{session_id}"
data = self.redis.get(key)
if not data:
return None
if isinstance(data, bytes):
data = data.decode("utf-8")
try:
return json.loads(data)
except Exception:
return None
def save_room_session(self, room_id: str, session: Dict[str, Any]) -> bool:
session_id = str(session.get("session_id") or "").strip()
if not session_id:
return False
payload = json.dumps(session, ensure_ascii=False)
session_key = f"{self.prefix}room:{room_id}:session:{session_id}"
latest_key = f"{self.prefix}room:{room_id}:latest_session"
index_key = f"{self.prefix}room:{room_id}:session_ids"
pipe = self.redis.pipeline()
pipe.set(session_key, payload)
pipe.set(latest_key, session_id)
pipe.lrem(index_key, 0, session_id)
pipe.lpush(index_key, session_id)
pipe.ltrim(index_key, 0, 29)
result = pipe.execute()
return bool(result)
def get_latest_room_session(self, room_id: str) -> Optional[Dict[str, Any]]:
latest_key = f"{self.prefix}room:{room_id}:latest_session"
session_id = self.redis.get(latest_key)
if not session_id:
return None
if isinstance(session_id, bytes):
session_id = session_id.decode("utf-8")
return self.get_room_session(room_id, str(session_id))
def list_room_session_ids(self, room_id: str, limit: int = 10) -> List[str]:
key = f"{self.prefix}room:{room_id}:session_ids"
rows = self.redis.lrange(key, 0, max(limit - 1, 0)) or []
result = []
for row in rows:
result.append(row.decode("utf-8") if isinstance(row, bytes) else str(row))
return result
def get_text_value(self, key: str) -> Optional[str]:
data = self.redis.get(key)
if not data:
return None
return data.decode("utf-8") if isinstance(data, bytes) else str(data)
def set_text_value(self, key: str, value: str) -> bool:
return bool(self.redis.set(key, value))
class DouyuPlugin(MessagePluginInterface):
# 报告缓存版本号:
# 1. 版本升级后会自动让历史缓存失效,避免继续复用旧文本/旧图片;
# 2. 本次将版本提升到 10
# - 新增粉丝日报定时任务链路;
# - LLM 输入材料再补充顺时序现场切片与场次故事线;
# - 同时让新日报结果自动避开旧缓存污染;
# 因此需要刷新旧缓存,确保新版结果真实命中新链路。
_DAILY_REPORT_CACHE_VERSION = 10
FEATURE_KEY = "DOUYU_MONITOR"
FEATURE_DESCRIPTION = "🎮 斗鱼开播提醒 [订阅斗鱼 房间号, 取消订阅斗鱼 房间号]"
@property
def name(self) -> str:
return "斗鱼直播"
@property
def version(self) -> str:
return "1.0.0"
@property
def description(self) -> str:
return "斗鱼主播开播下播提醒与群订阅管理"
@property
def author(self) -> str:
return "ABOT Team"
@property
def command_prefix(self) -> Optional[str]:
return ""
@property
def commands(self) -> List[str]:
return self._commands
@property
def feature_key(self) -> Optional[str]:
return self.FEATURE_KEY
@property
def feature_description(self) -> Optional[str]:
return self.FEATURE_DESCRIPTION
def __init__(self):
super().__init__()
self.bot: WechatAPIClient = None
self.feature = self.register_feature()
self.redis_manager: Optional[DouyuRedisManager] = None
self._commands = ["斗鱼订阅", "取消斗鱼订阅", "斗鱼订阅列表", "斗鱼订阅提醒", "取消斗鱼订阅提醒",
"订阅鱼吧", "取消订阅鱼吧", "鱼吧订阅列表",
"#斗鱼弹幕日报", "斗鱼弹幕日报", "#强制斗鱼弹幕日报", "强制斗鱼弹幕日报",
"#斗鱼粉丝日报", "斗鱼粉丝日报", "#强制斗鱼粉丝日报", "强制斗鱼粉丝日报"]
self._api_template = "https://www.douyu.com/betard/{room_id}"
self._yuba_api = "https://yuba.douyu.com/wgapi/yubanc/api/feed/getUserFeedList"
self._user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
self._check_interval = 5
self._session_cutoff_hour = 6
self._merge_gap_hours = 4
self._daily_report_enable = True
self._daily_report_time = "10:05"
self._daily_report_min_messages = 120
self._daily_report_use_llm = False
self._daily_report_max_sessions = 4
self._daily_report_max_length = 1800
self._daily_report_send_image = True
# 自动背景画像:
# 1. 用于在没有手工画像时,让 LLM 基于房间信息整理一份背景;
# 2. 结果会缓存到 Redis避免每次生成日报都重复请求模型
# 3. 即使模型支持联网/检索,也只把结果当“辅助语境”,不替代真实弹幕证据。
self._auto_room_background_profile_enable = True
self._auto_room_background_profile_ttl_seconds = 7 * 24 * 3600
# Dify 入参策略:
# 默认发送精简字段,避免某些 Workflow 对复杂对象输入校验严格导致 400。
# 如需在工作流中使用完整结构化 payload可在 report_api 显式开启。
self._daily_report_include_structured_inputs = False
self._audience_stats_sample_interval_seconds = 60
self._status_check_retry_count = 3
self._status_check_retry_delay_seconds = 1
self._daily_report_llm_client: Optional[UnifiedLLMClient] = None
# 直播间语义画像:
# 1. 允许按房间号补充“主播职业生涯、圈内关系、常见梗来源”等背景;
# 2. 这些信息不会直接替代真实弹幕,只用于帮助 LLM 更准确理解圈内黑话;
# 3. 当前主要用于 Dota2 这类强语境直播间,但结构保持通用。
self._room_context_profiles: Dict[str, Dict[str, Any]] = {}
self._danmu_recorders: Dict[str, DouyuDanmuRecorder] = {}
# 直播状态/鱼吧轮询继续保留在轻量 async_job 中,保障现网行为稳定。
async_job.every_minutes(self._check_interval)(self._scheduled_unified_check_job)
@staticmethod
def _format_exception(exc: Exception) -> str:
message = str(exc).strip()
if message:
return f"{type(exc).__name__}: {message}"
return type(exc).__name__
@staticmethod
def _parse_anchor_day_from_command(parts: List[str]) -> Tuple[bool, str]:
"""
统一解析日报命令里的日期参数。
返回值说明:
1. 第一个布尔值表示日期是否合法;
2. 第二个字符串在合法时是最终日期,不合法时保留原始输入,方便上层提示用户。
"""
anchor_day = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
if len(parts) < 2:
return True, anchor_day
day_text = parts[1].strip()
try:
return True, datetime.strptime(day_text, "%Y-%m-%d").strftime("%Y-%m-%d")
except Exception:
return False, day_text
@staticmethod
def _normalize_text_list(values: Any) -> List[str]:
"""
将配置或接口返回的“字符串列表”统一规整成干净的 list[str]。
这样做的原因:
1. TOML 里有些字段可能写成单字符串,有些写成字符串数组;
2. 后续 prompt 拼装只关心“有序文本集合”,不希望每处都重复判空和类型判断。
"""
if values is None:
return []
if isinstance(values, str):
value = values.strip()
return [value] if value else []
if not isinstance(values, (list, tuple, set)):
return []
result: List[str] = []
for item in values:
value = str(item or "").strip()
if value:
result.append(value)
return result
def _extract_room_runtime_context(self, room_info: Dict[str, Any]) -> Dict[str, Any]:
"""
从斗鱼房间接口里尽量多抽取“语义上下文”。
注意:
1. 斗鱼字段在不同房间或接口版本里可能并不完全一致,所以这里做多 key 兜底;
2. 就算某些字段拿不到,也保留空结构,避免后续 prompt 拼装分支过多。
"""
if not isinstance(room_info, dict):
room_info = {}
def pick(*keys: str) -> str:
for key in keys:
value = str(room_info.get(key) or "").strip()
if value:
return value
return ""
tags = self._normalize_text_list(
room_info.get("tag")
or room_info.get("tags")
or room_info.get("room_tags")
or room_info.get("show_details")
)
return {
"primary_category": pick("cate1Name", "cate_name", "game_name", "gameCateName"),
"secondary_category": pick("cate2Name", "second_lvl_name", "secondCateName", "sub_cate_name"),
"game_name": pick("game_name", "gameCateName", "cate2Name", "second_lvl_name"),
"tags": tags,
}
def _match_room_context_profile(self, room_id: str) -> Dict[str, Any]:
"""
从配置中读取指定房间号的人设/圈内背景。
配置优先按 room_id 精确匹配,避免不同主播之间串用职业生涯信息。
"""
if not isinstance(self._room_context_profiles, dict):
return {}
profile = self._room_context_profiles.get(str(room_id)) or {}
return dict(profile) if isinstance(profile, dict) else {}
def _merge_text_list_values(self, preferred: Any, fallback: Any, limit: int = 12) -> List[str]:
"""
合并两组文本列表,并保证“高优先级来源排前面”。
这里主要服务“手工画像 + Redis 自动画像”合并场景:
1. 手工配置的词条优先保留原顺序;
2. 自动画像只补充缺失项,不覆盖人工判断;
3. 最终长度受控,避免 prompt 被背景资料无限撑大。
"""
merged: List[str] = []
seen: Set[str] = set()
for raw_values in (preferred, fallback):
for item in self._normalize_text_list(raw_values):
marker = item.casefold()
if marker in seen:
continue
seen.add(marker)
merged.append(item)
if len(merged) >= max(int(limit or 0), 1):
return merged
return merged
def _profile_has_meaningful_content(self, profile: Optional[Dict[str, Any]]) -> bool:
"""
判断一份背景画像是否“真的有料”。
只要职业背景、身份摘要、领域、相关人物、剧情词、梗解释等核心字段里有任意有效内容,
就认为这份画像值得参与合并或缓存复用。
"""
if not isinstance(profile, dict) or not profile:
return False
text_fields = [
"domain",
"identity_summary",
"career_background",
]
for field in text_fields:
if str(profile.get(field) or "").strip():
return True
list_fields = [
"domain_keywords",
"related_people",
"storyline_keywords",
"meme_explanations",
"style_hints",
]
for field in list_fields:
if self._normalize_text_list(profile.get(field)):
return True
return False
def _normalize_auto_room_background_profile(self, profile: Dict[str, Any]) -> Dict[str, Any]:
"""
清洗 LLM 返回的背景画像 JSON。
这里刻意只保留日报真正会用到的核心字段,避免把过多“过程型元信息”
带进 Redis 和 prompt导致链路越来越重。
目标是保证进入 Redis 的内容:
1. 结构稳定;
2. 文本长度可控;
3. 只保留能帮助日报理解圈内梗的字段。
"""
profile = profile if isinstance(profile, dict) else {}
normalized = {
"domain": str(profile.get("domain") or "").strip()[:32],
"domain_keywords": self._normalize_text_list(profile.get("domain_keywords"))[:12],
"identity_summary": str(profile.get("identity_summary") or "").strip()[:160],
"career_background": str(profile.get("career_background") or "").strip()[:220],
"related_people": self._normalize_text_list(profile.get("related_people"))[:12],
"storyline_keywords": self._normalize_text_list(profile.get("storyline_keywords"))[:12],
"meme_explanations": self._normalize_text_list(profile.get("meme_explanations"))[:8],
"style_hints": self._normalize_text_list(profile.get("style_hints"))[:8],
}
if not self._profile_has_meaningful_content(normalized):
return {}
return normalized
@staticmethod
def _extract_json_object_from_text(text: str) -> Optional[Dict[str, Any]]:
"""
从 LLM 文本里提取 JSON 对象。
兼容两类常见脏输出:
1. 模型把 JSON 包在 ```json 代码块里;
2. 模型前后补了少量解释文字。
"""
raw = remove_reasoning_content(str(text or "").strip())
if not raw:
return None
if raw.startswith("```"):
raw = re.sub(r"^```(?:json)?", "", raw, flags=re.IGNORECASE).strip()
if raw.endswith("```"):
raw = raw[:-3].strip()
try:
obj = json.loads(raw)
return obj if isinstance(obj, dict) else None
except Exception:
pass
start = raw.find("{")
end = raw.rfind("}")
if start < 0 or end <= start:
return None
candidate = raw[start:end + 1].strip()
try:
obj = json.loads(candidate)
return obj if isinstance(obj, dict) else None
except Exception:
return None
def _merge_room_background_profiles(
self,
manual_profile: Dict[str, Any],
auto_profile: Dict[str, Any],
) -> Dict[str, Any]:
"""
合并手工画像与自动画像。
优先级固定为:
1. 手工配置;
2. Redis 自动画像;
3. 缺失字段保持空。
这样可以确保“人工确认过的信息”永远压过模型推断。
"""
manual_profile = manual_profile if isinstance(manual_profile, dict) else {}
auto_profile = auto_profile if isinstance(auto_profile, dict) else {}
has_manual = self._profile_has_meaningful_content(manual_profile)
has_auto = self._profile_has_meaningful_content(auto_profile)
return {
"domain": str(manual_profile.get("domain") or auto_profile.get("domain") or "").strip(),
"domain_keywords": self._merge_text_list_values(
manual_profile.get("domain_keywords"),
auto_profile.get("domain_keywords"),
),
"identity_summary": str(
manual_profile.get("identity_summary")
or auto_profile.get("identity_summary")
or ""
).strip(),
"career_background": str(
manual_profile.get("career_background")
or auto_profile.get("career_background")
or ""
).strip(),
"related_people": self._merge_text_list_values(
manual_profile.get("related_people"),
auto_profile.get("related_people"),
),
"storyline_keywords": self._merge_text_list_values(
manual_profile.get("storyline_keywords"),
auto_profile.get("storyline_keywords"),
),
"meme_explanations": self._merge_text_list_values(
manual_profile.get("meme_explanations"),
auto_profile.get("meme_explanations"),
limit=8,
),
"style_hints": self._merge_text_list_values(
manual_profile.get("style_hints"),
auto_profile.get("style_hints"),
limit=8,
),
}
def _build_room_semantic_context(
self,
room_id: str,
nickname: str,
room_name: str,
sessions: List[Dict[str, Any]],
) -> Dict[str, Any]:
"""
构建直播间语义上下文。
核心思想:
1. 先用实时房间信息判断“这是不是 Dota2/电竞强语境房间”;
2. 再叠加人工配置的主播职业生涯、圈内人物、常见梗来源;
3. 最终给 LLM 一份“理解背景”,但不替代真实弹幕证据。
"""
latest_session = sessions[-1] if sessions else {}
latest_runtime_context = dict(latest_session.get("room_context") or {})
latest_status_context = {}
if self.redis_manager:
latest_status = self.redis_manager.get_room_status(room_id) or {}
latest_status_context = dict(latest_status.get("room_context") or {})
merged_runtime_context = {
"primary_category": str(
latest_runtime_context.get("primary_category")
or latest_status_context.get("primary_category")
or ""
).strip(),
"secondary_category": str(
latest_runtime_context.get("secondary_category")
or latest_status_context.get("secondary_category")
or ""
).strip(),
"game_name": str(
latest_runtime_context.get("game_name")
or latest_status_context.get("game_name")
or ""
).strip(),
"tags": self._normalize_text_list(
latest_runtime_context.get("tags") or latest_status_context.get("tags") or []
),
}
manual_profile = self._match_room_context_profile(room_id)
auto_profile = {}
if self.redis_manager:
auto_profile = self.redis_manager.get_room_background_profile(room_id) or {}
profile = self._merge_room_background_profiles(manual_profile, auto_profile)
category_text = " ".join([
merged_runtime_context.get("primary_category", ""),
merged_runtime_context.get("secondary_category", ""),
merged_runtime_context.get("game_name", ""),
room_name,
nickname,
" ".join(merged_runtime_context.get("tags", [])),
" ".join(self._normalize_text_list(profile.get("domain_keywords"))),
]).lower()
inferred_domains: List[str] = []
if any(keyword in category_text for keyword in ["dota", "dota2", "刀塔", "ti", "major"]):
inferred_domains.append("Dota2")
if any(keyword in category_text for keyword in ["电竞", "esports", "职业", "选手"]):
inferred_domains.append("电竞直播")
# 如果配置明确写了 domain则放在最前面作为最强语义锚点。
configured_domain = str(profile.get("domain") or "").strip()
if configured_domain:
inferred_domains = [configured_domain] + [item for item in inferred_domains if item != configured_domain]
return {
"domain": configured_domain,
"inferred_domains": inferred_domains,
"runtime_context": merged_runtime_context,
"career_background": str(profile.get("career_background") or "").strip(),
"identity_summary": str(profile.get("identity_summary") or "").strip(),
"related_people": self._normalize_text_list(profile.get("related_people")),
"storyline_keywords": self._normalize_text_list(profile.get("storyline_keywords")),
"meme_explanations": self._normalize_text_list(profile.get("meme_explanations")),
"style_hints": self._normalize_text_list(profile.get("style_hints")),
}
def _build_room_context_prompt_block(self, payload: Dict[str, Any]) -> str:
"""
将直播间语义上下文整理成一段可以直接喂给 LLM 的提示块。
目标不是要求模型“背设定”,而是提醒它:
1. 先按 Dota2 / 电竞圈语境理解黑话和人物;
2. 看到选手、主播、职业生涯梗时,优先往房间背景上靠;
3. 仍然必须以当天真实弹幕和统计材料为主,不得凭空补剧情。
"""
room_context = payload.get("room_context", {}) or {}
runtime_context = room_context.get("runtime_context", {}) or {}
parts: List[str] = []
domains = [str(item or "").strip() for item in room_context.get("inferred_domains", []) or [] if str(item or "").strip()]
if domains:
parts.append(f"- 直播间领域语境:{', '.join(domains)}。若出现圈内黑话、人物简称、老梗,优先按这个语境理解。")
if runtime_context.get("game_name") or runtime_context.get("secondary_category") or runtime_context.get("primary_category"):
parts.append(
"- 房间分区信息:"
f"{runtime_context.get('primary_category') or '未知大类'} / "
f"{runtime_context.get('secondary_category') or runtime_context.get('game_name') or '未知小类'}"
)
if runtime_context.get("tags"):
parts.append(f"- 房间标签:{''.join(self._normalize_text_list(runtime_context.get('tags'))[:8])}")
if room_context.get("identity_summary"):
parts.append(f"- 主播身份提示:{room_context.get('identity_summary')}")
if room_context.get("career_background"):
parts.append(f"- 职业生涯背景:{room_context.get('career_background')}")
related_people = self._normalize_text_list(room_context.get("related_people"))
if related_people:
parts.append(f"- 重点相关人物:{''.join(related_people[:12])}。弹幕提到这些人时,优先考虑圈内关联。")
storyline_keywords = self._normalize_text_list(room_context.get("storyline_keywords"))
if storyline_keywords:
parts.append(f"- 常见剧情关键词:{''.join(storyline_keywords[:12])}")
meme_explanations = self._normalize_text_list(room_context.get("meme_explanations"))
if meme_explanations:
parts.append("- 常见梗解释:")
for item in meme_explanations[:6]:
parts.append(f" * {item}")
style_hints = self._normalize_text_list(room_context.get("style_hints"))
if style_hints:
parts.append(f"- 风格提示:{''.join(style_hints[:6])}")
if not parts:
return ""
return "【直播间语义上下文】\n" + "\n".join(parts) + "\n\n"
async def _fetch_json_with_retries(self, session: aiohttp.ClientSession, url: str,
headers: Dict[str, str], context: str,
params: Optional[Dict[str, Any]] = None) -> Any:
last_error: Optional[Exception] = None
for attempt in range(1, self._status_check_retry_count + 1):
try:
async with session.get(
url,
headers=headers,
params=params,
timeout=aiohttp.ClientTimeout(total=10)
) as resp:
resp.raise_for_status()
return await resp.json(content_type=None)
except Exception as e:
last_error = e
if attempt < self._status_check_retry_count:
logger.warning(
f"{context}失败,第{attempt}/{self._status_check_retry_count}次重试: "
f"{self._format_exception(e)}"
)
await asyncio.sleep(self._status_check_retry_delay_seconds)
continue
raise
if last_error:
raise last_error
raise RuntimeError(f"{context}失败,未获取到有效响应")
async def _scheduled_unified_check_job(self):
"""统一检查直播和鱼吧动态"""
await self._scheduled_check_job()
await self._scheduled_yuba_check_job()
async def _scheduled_daily_report_tick(self):
"""每 5 分钟检查一次,命中配置时间后发送前一天日报。"""
if not self._daily_report_enable or not self.redis_manager or not self.bot:
return
now_dt = datetime.now()
if not self._should_run_daily_report(now_dt):
return
anchor_day = (now_dt - timedelta(days=1)).strftime("%Y-%m-%d")
try:
await self._send_daily_reports(anchor_day)
self.redis_manager.set_text_value(self._daily_report_job_key(now_dt.strftime("%Y-%m-%d")), now_dt.strftime("%Y-%m-%d %H:%M:%S"))
except Exception as e:
logger.error(f"斗鱼每日报告任务失败(anchor_day={anchor_day}): {e}")
def get_schedule_actions(self) -> List[Dict[str, Any]]:
"""声明插件可调度动作。
设计说明:
1. 斗鱼“每日报告”迁移到插件任务配置体系,支持在后台可视化启停/改时;
2. 触发时间直接复用配置项 daily_report_time避免出现“两套时间配置”
3. 粉丝日报和运营日报都走同一套数据库调度体系,后台可以分别启停;
4. 作用域默认 all_enabled_groups让插件调度系统按群权限先过滤目标群。
"""
trigger_time = str(self._daily_report_time or "09:30").strip() or "09:30"
return [
{
"action_key": "douyu_daily_report_push",
"name": "斗鱼弹幕日报推送",
"description": "按配置时间推送前一天斗鱼弹幕日报",
"trigger_type": "at_times",
"trigger_config": {"time_list": [trigger_time]},
"target_scope": "all_enabled_groups",
"target_config": {},
"payload": {},
"default_enabled": bool(self._daily_report_enable),
},
{
"action_key": "douyu_fans_daily_report_push",
"name": "斗鱼粉丝日报推送",
"description": "按配置时间推送前一天斗鱼粉丝日报",
"trigger_type": "at_times",
"trigger_config": {"time_list": [trigger_time]},
"target_scope": "all_enabled_groups",
"target_config": {},
# 定时任务默认开启已发送保护,避免重载补偿或手动补跑时重复刷同一天内容。
"payload": {"respect_sent_flag": True},
"default_enabled": bool(self._daily_report_enable),
}
]
async def run_scheduled_action(self, action_key: str, context: Dict[str, Any]) -> Dict[str, Any]:
"""执行插件调度动作。"""
if action_key not in {"douyu_daily_report_push", "douyu_fans_daily_report_push"}:
return {"success": False, "summary": f"不支持动作: {action_key}", "detail": {}}
# 调度器注入 bot保证定时任务也能发消息。
self.bot = context.get("bot") or self.bot
if not self._daily_report_enable:
return {"success": True, "summary": "斗鱼每日报告已关闭,跳过执行", "detail": {"enabled": False}}
if not self.redis_manager or not self.bot:
return {"success": False, "summary": "斗鱼每日报告执行失败:依赖未就绪(redis/bot)", "detail": {}}
payload = context.get("payload") or {}
# 支持后台手动触发时覆盖 anchor_day便于补发历史某天日报。
anchor_day = str(payload.get("anchor_day") or "").strip()
if not anchor_day:
anchor_day = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
force = bool(payload.get("force", False))
force_regenerate = bool(payload.get("force_regenerate", False))
target_groups = [str(g).strip() for g in (context.get("target_groups") or []) if str(g).strip()]
if not target_groups:
target_groups = GroupBotManager.get_group_list()
is_fans_report = action_key == "douyu_fans_daily_report_push"
delivered_groups: List[str] = []
failed_groups: Dict[str, str] = {}
for gid in target_groups:
try:
# 按群推送:内部会再基于斗鱼订阅与插件权限做二次过滤。
if is_fans_report:
delivered = await self._send_fans_daily_reports(
anchor_day=anchor_day,
target_group_id=gid,
force_regenerate=force_regenerate,
force=force,
respect_sent_flag=bool(payload.get("respect_sent_flag", True)),
)
else:
delivered = await self._send_daily_reports(
anchor_day=anchor_day,
target_group_id=gid,
force=force,
force_regenerate=force_regenerate,
)
if delivered:
delivered_groups.append(gid)
except Exception as e:
failed_groups[gid] = self._format_exception(e)
report_label = "斗鱼粉丝日报" if is_fans_report else "斗鱼日报"
return {
"success": len(failed_groups) == 0,
"summary": (
f"{report_label}任务完成: 日期{anchor_day}, 目标群{len(target_groups)}个, "
f"成功发送群{len(delivered_groups)}个, 失败群{len(failed_groups)}"
),
"detail": {
"action_key": action_key,
"anchor_day": anchor_day,
"force": force,
"force_regenerate": force_regenerate,
"target_groups": target_groups,
"delivered_groups": delivered_groups,
"failed_groups": failed_groups,
},
}
def initialize(self, context: Dict[str, Any]) -> bool:
try:
dbm = DBConnectionManager.get_instance()
self.redis_manager = DouyuRedisManager(dbm)
self.bot = context.get("bot", self.bot)
cfg = self._config.get("Douyu", {})
cfg_cmds = cfg.get("command", [])
if isinstance(cfg_cmds, list) and cfg_cmds:
self._commands = list(dict.fromkeys(cfg_cmds + self._commands))
self._api_template = cfg.get("api_url_template", self._api_template)
self._user_agent = cfg.get("user_agent", self._user_agent)
self._check_interval = int(cfg.get("check_interval_minutes", self._check_interval))
self._session_cutoff_hour = int(cfg.get("session_cutoff_hour", self._session_cutoff_hour))
self._merge_gap_hours = int(cfg.get("merge_gap_hours", self._merge_gap_hours))
self._daily_report_enable = bool(cfg.get("daily_report_enable", self._daily_report_enable))
self._daily_report_time = str(cfg.get("daily_report_time", self._daily_report_time) or self._daily_report_time)
self._daily_report_min_messages = int(
cfg.get("daily_report_min_messages", self._daily_report_min_messages)
)
self._daily_report_use_llm = bool(cfg.get("daily_report_use_llm", self._daily_report_use_llm))
self._daily_report_max_sessions = int(cfg.get("daily_report_max_sessions", self._daily_report_max_sessions))
self._daily_report_max_length = int(cfg.get("daily_report_max_length", self._daily_report_max_length))
self._daily_report_send_image = bool(cfg.get("daily_report_send_image", self._daily_report_send_image))
self._auto_room_background_profile_enable = bool(
cfg.get("auto_room_background_profile_enable", self._auto_room_background_profile_enable)
)
self._auto_room_background_profile_ttl_seconds = max(
int(
cfg.get(
"auto_room_background_profile_ttl_seconds",
self._auto_room_background_profile_ttl_seconds,
)
),
3600,
)
self._audience_stats_sample_interval_seconds = int(
cfg.get("audience_stats_sample_interval_seconds", self._audience_stats_sample_interval_seconds)
)
raw_room_context_profiles = cfg.get("room_context_profiles", {}) or {}
self._room_context_profiles = (
raw_room_context_profiles if isinstance(raw_room_context_profiles, dict) else {}
)
report_api_cfg = cfg.get("report_api", {}) or {}
self._daily_report_include_structured_inputs = bool(
report_api_cfg.get(
"include_structured_inputs",
self._daily_report_include_structured_inputs,
)
)
if report_api_cfg:
self._daily_report_llm_client = UnifiedLLMClient(report_api_cfg)
return True
except Exception as e:
logger.error(f"{self.name} 初始化失败: {e}")
return False
def start(self) -> bool:
self.status = PluginStatus.RUNNING
return True
def stop(self) -> bool:
for recorder in getattr(self, "_danmu_recorders", {}).values():
recorder.stop()
self.status = PluginStatus.STOPPED
return True
def can_process(self, message: Dict[str, Any]) -> bool:
content = str(message.get("content", "")).strip()
if not content:
return False
first_token = content.split()[0]
return first_token in self._commands
@plugin_stats_decorator(plugin_name="斗鱼直播")
async def process_message(self, message: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
content = str(message.get("content", "")).strip()
sender = message.get("sender")
roomid = message.get("roomid", "")
gbm: GroupBotManager = message.get("gbm")
self.bot: WechatAPIClient = message.get("bot")
if roomid and gbm.get_group_permission(roomid, self.feature) == PermissionStatus.DISABLED:
return False, "没有权限"
first_token = content.split()[0]
if content == "斗鱼订阅列表":
rooms = self.redis_manager.list_group_rooms(roomid or sender)
if not rooms:
await self.bot.send_text_message(roomid or sender, "暂无订阅", sender)
return True, "暂无订阅"
text = "当前订阅的斗鱼房间:\n" + "\n".join(rooms)
await self.bot.send_text_message(roomid or sender, text, sender)
return True, "列表已发送"
if first_token == "斗鱼订阅提醒":
if not roomid:
await self.bot.send_text_message(sender, "请在群聊中使用该命令", sender)
return True, "仅支持群聊"
ok = self.redis_manager.add_group_subscriber(roomid, sender)
await self.bot.send_at_message(roomid, "已加入斗鱼订阅提醒名单", [sender])
return True, "加入提醒名单成功" if ok else "加入提醒名单失败"
if first_token == "取消斗鱼订阅提醒":
if not roomid:
await self.bot.send_text_message(sender, "请在群聊中使用该命令", sender)
return True, "仅支持群聊"
ok = self.redis_manager.remove_group_subscriber(roomid, sender)
await self.bot.send_at_message(roomid, "已取消斗鱼订阅提醒", [sender])
return True, "取消提醒成功" if ok else "取消提醒失败"
if first_token == "斗鱼订阅":
parts = content.split()
if len(parts) < 2:
await self.bot.send_text_message(roomid or sender, "请提供房间号,例如:订阅斗鱼 7718843", sender)
return True, "命令格式错误"
room_id = parts[1].strip()
if not room_id.isdigit():
await self.bot.send_text_message(roomid or sender, "房间号必须是数字,例如:斗鱼订阅 52876", sender)
return True, "命令格式错误"
ok = self.redis_manager.add_group_room(roomid or sender, room_id)
await self.bot.send_text_message(roomid or sender, f"✅ 已订阅斗鱼房间 {room_id}", sender)
return True, "订阅成功" if ok else "订阅失败"
if first_token == "取消斗鱼订阅":
parts = content.split()
if len(parts) < 2:
await self.bot.send_text_message(roomid or sender, "请提供房间号,例如:取消订阅斗鱼 7718843", sender)
return True, "命令格式错误"
room_id = parts[1].strip()
if not room_id.isdigit():
await self.bot.send_text_message(roomid or sender, "房间号必须是数字,例如:取消斗鱼订阅 52876", sender)
return True, "命令格式错误"
ok = self.redis_manager.remove_group_room(roomid or sender, room_id)
await self.bot.send_text_message(roomid or sender, f"✅ 已取消订阅斗鱼房间 {room_id}", sender)
return True, "取消成功" if ok else "取消失败"
if content == "鱼吧订阅列表":
yubas = self.redis_manager.list_group_yubas(roomid or sender)
if not yubas:
await self.bot.send_text_message(roomid or sender, "暂无鱼吧订阅", sender)
return True, "暂无鱼吧订阅"
text = "当前订阅的斗鱼鱼吧:\n" + "\n".join(yubas)
await self.bot.send_text_message(roomid or sender, text, sender)
return True, "列表已发送"
if first_token == "订阅鱼吧":
parts = content.split()
if len(parts) < 2:
await self.bot.send_text_message(roomid or sender, "请提供鱼吧 hash_id例如订阅鱼吧 PDAP2zEk3nwx",
sender)
return True, "命令格式错误"
hash_id = parts[1].strip()
ok = self.redis_manager.add_group_yuba(roomid or sender, hash_id)
await self.bot.send_text_message(roomid or sender, f"✅ 已订阅斗鱼鱼吧 {hash_id}", sender)
return True, "订阅成功" if ok else "订阅失败"
if first_token == "取消订阅鱼吧":
parts = content.split()
if len(parts) < 2:
await self.bot.send_text_message(roomid or sender, "请提供鱼吧 hash_id例如取消订阅鱼吧 PDAP2zEk3nwx",
sender)
return True, "命令格式错误"
hash_id = parts[1].strip()
ok = self.redis_manager.remove_group_yuba(roomid or sender, hash_id)
await self.bot.send_text_message(roomid or sender, f"✅ 已取消订阅斗鱼鱼吧 {hash_id}", sender)
return True, "取消成功" if ok else "取消失败"
if first_token in {"#斗鱼弹幕日报", "斗鱼弹幕日报"}:
if not roomid:
await self.bot.send_text_message(sender, "请在群聊中使用该命令", sender)
return True, "仅支持群聊"
parts = content.split()
ok, anchor_day = self._parse_anchor_day_from_command(parts)
if not ok:
await self.bot.send_text_message(roomid, "日期格式错误,请使用:#斗鱼弹幕日报 2026-04-07", sender)
return True, "日期格式错误"
await self.bot.send_text_message(roomid, f"⏳ 正在生成斗鱼弹幕日报:{anchor_day}", sender)
# 普通手动命令也默认重生成,避免命中缓存后看起来“没有走 Dify”。
# 定时任务仍保留缓存策略,这里只影响人工触发路径。
delivered = await self._send_daily_reports(
anchor_day,
target_group_id=roomid,
force=True,
force_regenerate=True,
)
if delivered:
return True, f"斗鱼弹幕日报已发送:{anchor_day}"
await self.bot.send_text_message(roomid, f"暂无可发送的斗鱼弹幕日报:{anchor_day}", sender)
return True, "暂无日报"
if first_token in {"#强制斗鱼弹幕日报", "强制斗鱼弹幕日报"}:
if not roomid:
await self.bot.send_text_message(sender, "请在群聊中使用该命令", sender)
return True, "仅支持群聊"
parts = content.split()
ok, anchor_day = self._parse_anchor_day_from_command(parts)
if not ok:
await self.bot.send_text_message(roomid, "日期格式错误,请使用:#强制斗鱼弹幕日报 2026-04-07", sender)
return True, "日期格式错误"
# 这里明确提示“强制重生成”,便于群内区分普通日报和回归测试操作。
await self.bot.send_text_message(roomid, f"⏳ 正在强制重生成斗鱼弹幕日报:{anchor_day}", sender)
delivered = await self._send_daily_reports(
anchor_day,
target_group_id=roomid,
force=True,
force_regenerate=True,
)
if delivered:
return True, f"斗鱼弹幕日报已强制重生成并发送:{anchor_day}"
await self.bot.send_text_message(roomid, f"暂无可发送的斗鱼弹幕日报:{anchor_day}", sender)
return True, "暂无日报"
if first_token in {"#斗鱼粉丝日报", "斗鱼粉丝日报"}:
if not roomid:
await self.bot.send_text_message(sender, "请在群聊中使用该命令", sender)
return True, "仅支持群聊"
parts = content.split()
ok, anchor_day = self._parse_anchor_day_from_command(parts)
if not ok:
await self.bot.send_text_message(roomid, "日期格式错误,请使用:#斗鱼粉丝日报 2026-04-07", sender)
return True, "日期格式错误"
await self.bot.send_text_message(roomid, f"🎉 正在生成斗鱼粉丝日报:{anchor_day}", sender)
# 粉丝版定位为“开心图文”,手动触发时默认直接重生成,
# 这样群里测试文案、模板时可以马上看到最新乐子版本。
delivered = await self._send_fans_daily_reports(
anchor_day,
target_group_id=roomid,
force_regenerate=True,
)
if delivered:
return True, f"斗鱼粉丝日报已发送:{anchor_day}"
await self.bot.send_text_message(roomid, f"暂无可发送的斗鱼粉丝日报:{anchor_day}", sender)
return True, "暂无日报"
if first_token in {"#强制斗鱼粉丝日报", "强制斗鱼粉丝日报"}:
if not roomid:
await self.bot.send_text_message(sender, "请在群聊中使用该命令", sender)
return True, "仅支持群聊"
parts = content.split()
ok, anchor_day = self._parse_anchor_day_from_command(parts)
if not ok:
await self.bot.send_text_message(roomid, "日期格式错误,请使用:#强制斗鱼粉丝日报 2026-04-07", sender)
return True, "日期格式错误"
await self.bot.send_text_message(roomid, f"🎉 正在强制重生成斗鱼粉丝日报:{anchor_day}", sender)
delivered = await self._send_fans_daily_reports(
anchor_day,
target_group_id=roomid,
force_regenerate=True,
)
if delivered:
return True, f"斗鱼粉丝日报已强制重生成并发送:{anchor_day}"
await self.bot.send_text_message(roomid, f"暂无可发送的斗鱼粉丝日报:{anchor_day}", sender)
return True, "暂无日报"
return False, None
async def _scheduled_check_job(self):
try:
rooms = self.redis_manager.all_subscribed_rooms()
if not rooms:
return
async with aiohttp.ClientSession() as session:
for room_id in rooms:
try:
url = self._api_template.format(room_id=room_id)
headers = {
"User-Agent": self._user_agent,
"Referer": f"https://www.douyu.com/{room_id}"
}
data = await self._fetch_json_with_retries(
session,
url,
headers,
context=f"斗鱼在线检查(room_id={room_id})"
)
room_info = data.get("room", {}) if isinstance(data, dict) else {}
show_status = room_info.get("show_status")
nickname = room_info.get("nickname", "")
room_name = room_info.get("room_name", "")
room_context = self._extract_room_runtime_context(room_info)
avatar = room_info.get("avatar", {}) or {}
thumb_url = str(avatar.get("small", "") or "").strip().strip("`").strip()
video_loop_raw = room_info.get("videoLoop", 0)
try:
video_loop = int(str(video_loop_raw))
except Exception:
video_loop = 0
prev = self.redis_manager.get_room_status(room_id) or {}
prev_live = prev.get("is_live")
curr_live = True if show_status == 1 and video_loop == 0 else False
status_obj = {
"is_live": curr_live,
"nickname": nickname,
"room_name": room_name,
"is_loop": True if video_loop == 1 else False,
# 保存最近一次探测到的房间上下文,供日报生成阶段辅助理解圈内梗。
"room_context": room_context,
}
self.redis_manager.set_room_status(room_id, status_obj)
if prev_live is None and curr_live is False:
continue
if prev_live is None and curr_live is True:
await self._notify_groups_live(room_id, nickname, room_name, thumb_url, room_context)
continue
if prev_live is False and curr_live is True:
await self._notify_groups_live(room_id, nickname, room_name, thumb_url, room_context)
continue
if prev_live is True and curr_live is False:
await self._notify_groups_offline(room_id, nickname, room_name, video_loop == 1, room_context)
continue
if prev_live is True and curr_live is True and room_id not in self._danmu_recorders:
try:
room_session = self._open_or_resume_session(room_id, nickname, room_name, room_context)
if room_session:
logger.info(
f"检测到持续直播状态,续接斗鱼直播会话({room_id}): "
f"session={room_session.get('session_id')}"
)
logger.info(f"检测到持续直播状态,补偿启动斗鱼弹幕记录({room_id})")
self._start_danmu_record(room_id)
except Exception as e:
logger.exception(
f"补偿启动斗鱼弹幕记录失败({room_id}): {self._format_exception(e)}"
)
continue
await asyncio.sleep(0.1)
except Exception as e:
logger.exception(
f"斗鱼检查失败(room_id={room_id}): {self._format_exception(e)}"
)
continue
except Exception as e:
logger.exception(f"斗鱼定时任务异常: {self._format_exception(e)}")
async def _notify_groups_live(
self,
room_id: str,
nickname: str,
room_name: str,
thumb_url: str,
room_context: Optional[Dict[str, Any]] = None,
):
groups = self.redis_manager.groups_for_room(room_id)
text = f"🚀 斗鱼开播通知 \n🎤 {nickname} 正在直播中!\n 📌 房间标题:{room_name} \n 👉 点击观看https://www.douyu.com/{room_id}"
xml_content = DOUYU_MESSAGE_XML.format(title=room_name, liver=nickname, roomid=room_id, thumburl=thumb_url)
for gid in groups:
if GroupBotManager.get_group_permission(gid, self.feature) == PermissionStatus.ENABLED:
try:
subs = self.redis_manager.list_group_subscribers(gid)
if subs:
await self.bot.send_at_message(gid, text, subs)
else:
await self.bot.send_text_message(gid, text)
await self.bot.send_link_xml_message(xml_content, gid)
except Exception as e:
logger.error(f"发送斗鱼开播提醒失败: {e}")
continue
try:
session = self._open_or_resume_session(room_id, nickname, room_name, room_context or {})
if session:
logger.info(
f"斗鱼直播会话开启/续接: room={room_id}, session={session.get('session_id')}, "
f"segments={len(session.get('segments', []))}, anchor_day={session.get('anchor_day')}"
)
logger.info(f"启动斗鱼弹幕记录({room_id})")
self._start_danmu_record(room_id)
except Exception as e:
logger.error(f"启动斗鱼弹幕记录失败({room_id}): {e}")
async def _notify_groups_offline(
self,
room_id: str,
nickname: str,
room_name: str,
is_loop: bool = False,
room_context: Optional[Dict[str, Any]] = None,
):
groups = self.redis_manager.groups_for_room(room_id)
text = f"🔔 斗鱼提醒:{nickname} 下播啦~\n 🏷️ {room_name}"
if is_loop:
text += "(当前为轮播)"
for gid in groups:
if GroupBotManager.get_group_permission(gid, self.feature) == PermissionStatus.ENABLED:
try:
await self.bot.send_text_message(gid, text)
except Exception as e:
logger.error(f"发送斗鱼下播提醒失败: {e}")
continue
try:
session = self._close_active_session(room_id, nickname, room_name, room_context or {})
if session:
logger.info(
f"斗鱼直播会话关闭片段: room={room_id}, session={session.get('session_id')}, "
f"segments={len(session.get('segments', []))}, is_live={session.get('is_live')}"
)
logger.info(f"停止斗鱼弹幕记录({room_id})")
self._stop_danmu_record(room_id)
except Exception as e:
logger.error(f"停止斗鱼弹幕记录失败({room_id}): {e}")
async def _scheduled_yuba_check_job(self):
try:
yubas = self.redis_manager.all_subscribed_yubas()
if not yubas:
return
async with aiohttp.ClientSession() as session:
for hash_id in yubas:
try:
params = {
"filter_type": 1,
"hash_id": hash_id,
"limit": 10,
"offset": 0
}
headers = {
"User-Agent": self._user_agent,
"Referer": f"https://yuba.douyu.com/member/{hash_id}/main/news",
}
data = await self._fetch_json_with_retries(
session,
self._yuba_api,
headers,
context=f"斗鱼鱼吧检查(hash_id={hash_id})",
params=params
)
if data.get("error") != 0:
logger.error(f"斗鱼鱼吧 API 错误 ({hash_id}): {data.get('msg')}")
continue
feed_list = data.get("data", {}).get("feed_list", [])
# 查找第一条【非置顶】动态
target_feed = None
for feed in feed_list:
if feed.get("home_feed_top") == 1:
continue
target_feed = feed
break
if not target_feed:
continue
feed_id = str(target_feed.get("feed_id"))
last_id = self.redis_manager.get_yuba_last_id(hash_id)
if last_id and feed_id == last_id:
continue
# 发现新动态
nickname = target_feed.get("publisher", {}).get("nickname", "未知主播")
content = target_feed.get("text", "")
ctime = target_feed.get("ctime")
from datetime import datetime
publish_time = datetime.fromtimestamp(int(ctime)).strftime(
'%Y-%m-%d %H:%M:%S') if ctime else "未知时间"
# 限制内容长度
if len(content) > 200:
content = content[:200] + "..."
full_url = f"https://yuba.douyu.com/feed/{feed_id}"
await self._notify_groups_yuba(hash_id, nickname, content, full_url, publish_time)
# 保存标记
self.redis_manager.set_yuba_last_id(hash_id, feed_id)
await asyncio.sleep(0.5)
except Exception as e:
logger.exception(
f"检查斗鱼鱼吧({hash_id})失败: {self._format_exception(e)}"
)
continue
except Exception as e:
logger.exception(f"斗鱼鱼吧定时任务异常: {self._format_exception(e)}")
async def _notify_groups_yuba(self, hash_id: str, nickname: str, content: str, url: str,
publish_time: str = "未知时间"):
groups = self.redis_manager.groups_for_yuba(hash_id)
text = f"🌟 斗鱼鱼吧动态提醒 \n👤 主播:{nickname}\n⏰ 时间:{publish_time}\n📝 内容:{content}\n🔗 链接:{url}"
for gid in groups:
if GroupBotManager.get_group_permission(gid, self.feature) == PermissionStatus.ENABLED:
try:
await self.bot.send_text_message(gid, text)
except Exception as e:
logger.error(f"发送斗鱼鱼吧动态提醒失败: {e}")
continue
def _get_danmu_recorder(self, room_id: str) -> DouyuDanmuRecorder:
recorder = self._danmu_recorders.get(room_id)
if not recorder:
recorder = DouyuDanmuRecorder(
room_id,
self._user_agent,
stats_callback=self._record_room_audience_point,
stats_sample_interval_seconds=self._audience_stats_sample_interval_seconds,
)
self._danmu_recorders[room_id] = recorder
return recorder
@classmethod
def _normalize_audience_points(cls, points: List[Dict[str, Any]], limit: int = 720) -> List[Dict[str, Any]]:
minute_map: Dict[str, Dict[str, Any]] = {}
for item in points or []:
timestamp = str(item.get("timestamp") or "").strip()
point_dt = cls._parse_session_time(timestamp)
if not point_dt:
continue
minute_key = point_dt.strftime("%Y-%m-%d %H:%M")
minute_map[minute_key] = {
"timestamp": timestamp,
"vip_count": int(item.get("vip_count", 0) or 0),
"diamond_count": int(item.get("diamond_count", 0) or 0),
}
normalized = list(minute_map.values())
normalized.sort(key=lambda row: row.get("timestamp", ""))
if len(normalized) <= limit:
return normalized
bucket_size_minutes = max((len(normalized) + limit - 1) // limit, 1)
bucket_map: Dict[str, Dict[str, Any]] = {}
for item in normalized:
point_dt = cls._parse_session_time(str(item.get("timestamp") or ""))
if not point_dt:
continue
total_minutes = int(point_dt.timestamp() // 60)
bucket_start_minutes = total_minutes - (total_minutes % bucket_size_minutes)
bucket_key = str(bucket_start_minutes)
bucket_map[bucket_key] = item
compressed = list(bucket_map.values())
compressed.sort(key=lambda row: row.get("timestamp", ""))
if len(compressed) > limit:
compressed = compressed[-limit:]
return compressed
def _record_room_audience_point(self, room_id: str, point: Dict[str, Any]) -> None:
if not self.redis_manager or not room_id:
return
session = self.redis_manager.get_latest_room_session(room_id)
if not session or not bool(session.get("is_live")):
return
current_points = self._normalize_audience_points(list(session.get("audience_points", []) or []))
merged_points = self._normalize_audience_points(current_points + [point])
if merged_points == current_points:
return
session["audience_points"] = merged_points
session["updated_at"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
self.redis_manager.save_room_session(room_id, session)
def _resolve_anchor_day(self, target_dt: datetime) -> str:
if target_dt.hour < self._session_cutoff_hour:
target_dt = target_dt - timedelta(days=1)
return target_dt.strftime("%Y-%m-%d")
@staticmethod
def _parse_session_time(value: str) -> Optional[datetime]:
if not value:
return None
try:
return datetime.strptime(str(value), "%Y-%m-%d %H:%M:%S")
except Exception:
return None
@staticmethod
def _find_open_segment(session: Dict[str, Any]) -> Optional[Dict[str, Any]]:
for segment in reversed(session.get("segments", []) or []):
if not str(segment.get("end_time") or "").strip():
return segment
return None
def _should_merge_with_latest_session(self, latest_session: Optional[Dict[str, Any]], now_dt: datetime) -> bool:
if not latest_session:
return False
if latest_session.get("is_live"):
return True
segments = latest_session.get("segments", []) or []
if not segments:
return False
last_segment = segments[-1]
end_dt = self._parse_session_time(last_segment.get("end_time", ""))
if not end_dt:
return False
gap_seconds = (now_dt - end_dt).total_seconds()
return 0 <= gap_seconds <= self._merge_gap_hours * 3600
def _open_or_resume_session(
self,
room_id: str,
nickname: str,
room_name: str,
room_context: Optional[Dict[str, Any]] = None,
) -> Optional[Dict[str, Any]]:
if not self.redis_manager:
return None
now_dt = datetime.now()
now_str = now_dt.strftime("%Y-%m-%d %H:%M:%S")
latest_session = self.redis_manager.get_latest_room_session(room_id) or {}
if self._should_merge_with_latest_session(latest_session, now_dt):
session = dict(latest_session)
open_segment = self._find_open_segment(session)
if not open_segment:
segments = list(session.get("segments", []) or [])
segments.append({"start_time": now_str, "end_time": ""})
session["segments"] = segments
else:
anchor_day = self._resolve_anchor_day(now_dt)
session = {
"session_id": f"{room_id}_{anchor_day.replace('-', '')}_{now_dt.strftime('%H%M%S')}",
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": nickname,
"room_name": room_name,
"room_context": dict(room_context or {}),
"segments": [{"start_time": now_str, "end_time": ""}],
"audience_points": [],
"is_live": True,
"summary_status": "pending",
"summary_generated_at": "",
"created_at": now_str,
}
session["nickname"] = nickname or session.get("nickname", "")
session["room_name"] = room_name or session.get("room_name", "")
if room_context:
session["room_context"] = dict(room_context)
session["audience_points"] = self._normalize_audience_points(list(session.get("audience_points", []) or []))
session["is_live"] = True
session["updated_at"] = now_str
session["last_live_at"] = now_str
self.redis_manager.save_room_session(room_id, session)
return session
def _close_active_session(
self,
room_id: str,
nickname: str,
room_name: str,
room_context: Optional[Dict[str, Any]] = None,
) -> Optional[Dict[str, Any]]:
if not self.redis_manager:
return None
session = self.redis_manager.get_latest_room_session(room_id)
if not session:
return None
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
open_segment = self._find_open_segment(session)
if open_segment:
open_segment["end_time"] = now_str
session["nickname"] = nickname or session.get("nickname", "")
session["room_name"] = room_name or session.get("room_name", "")
if room_context:
session["room_context"] = dict(room_context)
session["is_live"] = False
session["updated_at"] = now_str
session["last_offline_at"] = now_str
self.redis_manager.save_room_session(room_id, session)
return session
def get_room_session(self, room_id: str, session_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
if not self.redis_manager or not room_id:
return None
if session_id:
return self.redis_manager.get_room_session(room_id, session_id)
return self.redis_manager.get_latest_room_session(room_id)
def build_session_danmu_material(self, room_id: str, session_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
"""
旁路能力:从已有日文件中按直播 session 抽取有效弹幕,并压缩成可供后续总结使用的材料。
当前不影响提醒、采集、群消息发送主流程。
"""
session = self.get_room_session(room_id, session_id=session_id)
if not session:
return None
messages = DouyuDanmuSummaryHelper.load_session_messages(room_id, session)
material = DouyuDanmuSummaryHelper.build_summary_material(room_id, session, messages)
material["session"] = {
"session_id": session.get("session_id", ""),
"anchor_day": session.get("anchor_day", ""),
"nickname": session.get("nickname", ""),
"room_name": session.get("room_name", ""),
"is_live": bool(session.get("is_live")),
}
return material
def build_session_llm_payload(self, room_id: str, session_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
"""
旁路能力:构造可直接发送给 LLM 的弹幕总结载荷。
不改变现有弹幕采集和通知主流程。
"""
session = self.get_room_session(room_id, session_id=session_id)
if not session:
return None
messages = DouyuDanmuSummaryHelper.load_session_messages(room_id, session)
return DouyuDanmuSummaryHelper.build_llm_payload(room_id, session, messages)
def _daily_report_job_key(self, day_key: str) -> str:
return f"{self.redis_manager.prefix}daily_report_job:{day_key}"
def _daily_report_room_key(self, room_id: str, anchor_day: str, group_id: Optional[str] = None) -> str:
"""
日报发送标记支持按群粒度区分。
兼容说明:
1. 不传 group_id 时,沿用“房间 + 日期”粒度,适合一次性向所有订阅群统一推送;
2. 传入 group_id 时,改成“房间 + 日期 + 群”粒度,适合插件调度逐群执行;
3. 这样可以避免同一房间被多个群订阅时,前一个群的发送记录误伤后一个群。
"""
if str(group_id or "").strip():
return f"{self.redis_manager.prefix}daily_report:{room_id}:{anchor_day}:{str(group_id).strip()}"
return f"{self.redis_manager.prefix}daily_report:{room_id}:{anchor_day}"
def _fans_daily_report_room_key(self, room_id: str, anchor_day: str, group_id: Optional[str] = None) -> str:
"""
粉丝日报使用独立发送标记。
这样做的原因:
1. 不和运营版日报共用去重状态,避免两种内容互相影响;
2. 定时任务补偿/重载时可以避免同一天重复发粉丝日报;
3. 同样支持按群粒度区分,避免多群订阅同房间时互相误伤;
4. 手工命令仍可选择无视这个标记,保留“重复召回”的灵活性。
"""
if str(group_id or "").strip():
return f"{self.redis_manager.prefix}fans_daily_report:{room_id}:{anchor_day}:{str(group_id).strip()}"
return f"{self.redis_manager.prefix}fans_daily_report:{room_id}:{anchor_day}"
@staticmethod
def _daily_report_cache_dir() -> str:
path = os.path.join("temp", "douyu_materials")
os.makedirs(path, exist_ok=True)
return path
def _daily_report_cache_path(self, room_id: str, anchor_day: str, report_kind: str = "operator") -> str:
# 把不同风格的日报结果拆到独立缓存文件中:
# 1. 运营版继续使用 operator
# 2. 粉丝向恶搞版使用 fans
# 3. 这样两套模板和文本互不覆盖,便于分别调试和回归。
safe_kind = str(report_kind or "operator").strip().lower() or "operator"
return os.path.join(
self._daily_report_cache_dir(),
f"{room_id}_{anchor_day.replace('-', '')}_{safe_kind}_daily_report_result.json",
)
def _load_daily_report_cache(self, room_id: str, anchor_day: str, report_kind: str = "operator") -> Optional[Dict[str, Any]]:
cache_path = self._daily_report_cache_path(room_id, anchor_day, report_kind=report_kind)
if not os.path.exists(cache_path):
return None
try:
with open(cache_path, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, dict):
return data
except Exception as e:
logger.warning(
f"读取斗鱼每日报告缓存失败(room={room_id}, day={anchor_day}, kind={report_kind}): {e}"
)
return None
def _save_daily_report_cache(
self,
room_id: str,
anchor_day: str,
data: Dict[str, Any],
report_kind: str = "operator",
) -> None:
cache_path = self._daily_report_cache_path(room_id, anchor_day, report_kind=report_kind)
try:
with open(cache_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
except Exception as e:
logger.warning(
f"保存斗鱼每日报告缓存失败(room={room_id}, day={anchor_day}, kind={report_kind}): {e}"
)
@staticmethod
def _resolve_existing_report_image(image_path: Optional[str]) -> Optional[str]:
path = str(image_path or "").strip()
if not path:
return None
return path if os.path.exists(path) else None
def _should_run_daily_report(self, now_dt: datetime) -> bool:
time_text = str(self._daily_report_time or "").strip()
try:
target_hour, target_minute = [int(part) for part in time_text.split(":", 1)]
except Exception:
return False
target_dt = now_dt.replace(hour=target_hour, minute=target_minute, second=0, microsecond=0)
if now_dt < target_dt or now_dt > target_dt + timedelta(minutes=4, seconds=59):
return False
last_run = self.redis_manager.get_text_value(self._daily_report_job_key(now_dt.strftime("%Y-%m-%d")))
return not last_run
def _load_sessions_for_anchor_day(self, room_id: str, anchor_day: str) -> List[Dict[str, Any]]:
if not self.redis_manager:
return []
sessions = []
for session_id in self.redis_manager.list_room_session_ids(room_id, limit=30):
session = self.redis_manager.get_room_session(room_id, session_id)
if not session:
continue
if str(session.get("anchor_day") or "") != anchor_day:
continue
sessions.append(session)
sessions.sort(
key=lambda item: str(((item.get("segments") or [{}])[0]).get("start_time", "")),
)
if sessions:
return sessions[:self._daily_report_max_sessions]
inferred_sessions = self._infer_sessions_for_anchor_day(room_id, anchor_day)
if inferred_sessions:
logger.info(
f"斗鱼每日报告使用弹幕文件回推 session: room={room_id}, day={anchor_day}, "
f"count={len(inferred_sessions)}"
)
return inferred_sessions[:self._daily_report_max_sessions]
return []
def _infer_sessions_for_anchor_day(self, room_id: str, anchor_day: str) -> List[Dict[str, Any]]:
date_key = anchor_day.replace("-", "")
day_messages = DouyuDanmuSummaryHelper.load_day_messages(room_id, date_key)
if not day_messages:
return []
inferred_sessions = DouyuDanmuSummaryHelper.infer_sessions_from_messages(
room_id,
day_messages,
session_cutoff_hour=self._session_cutoff_hour,
merge_gap_hours=self._merge_gap_hours,
min_session_messages=min(50, self._daily_report_min_messages),
)
inferred_sessions = [
item for item in inferred_sessions
if str(item.get("anchor_day") or "") == anchor_day
]
if inferred_sessions:
return inferred_sessions
if len(day_messages) < self._daily_report_min_messages:
return []
ordered = sorted(day_messages, key=lambda item: item.get("timestamp") or datetime.min)
start_dt = ordered[0].get("timestamp")
end_dt = ordered[-1].get("timestamp")
if not isinstance(start_dt, datetime) or not isinstance(end_dt, datetime):
return []
return [{
"session_id": f"{room_id}_{date_key}_fallback",
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": "",
"room_name": "",
"segments": [{
"start_time": start_dt.strftime("%Y-%m-%d %H:%M:%S"),
"end_time": end_dt.strftime("%Y-%m-%d %H:%M:%S"),
}],
"is_live": False,
"source": "fallback_full_day",
}]
def _build_audience_trend(self, sessions: List[Dict[str, Any]]) -> Dict[str, Any]:
points: List[Dict[str, Any]] = []
segment_start_times: List[str] = []
segment_end_times: List[str] = []
for session in sessions:
for segment in session.get("segments", []) or []:
start_time = str(segment.get("start_time") or "").strip()
end_time = str(segment.get("end_time") or "").strip()
if start_time:
segment_start_times.append(start_time)
if end_time:
segment_end_times.append(end_time)
for item in session.get("audience_points", []) or []:
point = {
"timestamp": str(item.get("timestamp") or "").strip(),
"vip_count": int(item.get("vip_count", 0) or 0),
"diamond_count": int(item.get("diamond_count", 0) or 0),
}
if point["timestamp"]:
points.append(point)
points = self._normalize_audience_points(points, limit=1440)
if not points:
return {"points": [], "summary": {}}
vip_values = [int(item.get("vip_count", 0) or 0) for item in points]
diamond_values = [int(item.get("diamond_count", 0) or 0) for item in points]
labels = [str(item.get("timestamp") or "")[-8:-3] for item in points]
session_start = min(segment_start_times) if segment_start_times else ""
session_end = max(segment_end_times) if segment_end_times else str(points[-1].get("timestamp") or "")
first_point_time = str(points[0].get("timestamp") or "")
last_point_time = str(points[-1].get("timestamp") or "")
leading_gap_minutes = 0
if session_start and first_point_time:
start_dt = self._parse_session_time(session_start)
point_dt = self._parse_session_time(first_point_time)
if start_dt and point_dt:
leading_gap_minutes = max(int((point_dt - start_dt).total_seconds() // 60), 0)
return {
"points": points,
"summary": {
"point_count": len(points),
"vip_min": min(vip_values),
"vip_max": max(vip_values),
"vip_latest": vip_values[-1],
"diamond_min": min(diamond_values),
"diamond_max": max(diamond_values),
"diamond_latest": diamond_values[-1],
"labels": labels,
"session_start": session_start,
"session_end": session_end,
"first_point_time": first_point_time,
"last_point_time": last_point_time,
"leading_gap_minutes": leading_gap_minutes,
},
}
def _build_daily_report_payload(self, room_id: str, anchor_day: str, sessions: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
if not sessions:
return None
session_payloads: List[Dict[str, Any]] = []
all_messages: List[Dict[str, Any]] = []
total_message_count = 0
total_noise_filtered_count = 0
total_organized_message_count = 0
total_unique_users: Set[str] = set()
merged_templates: List[Dict[str, Any]] = []
repeated_messages: List[Dict[str, Any]] = []
peak_buckets: List[Dict[str, Any]] = []
representative_messages: List[Dict[str, Any]] = []
raw_window_samples: List[Dict[str, Any]] = []
chronological_samples: List[Dict[str, Any]] = []
session_storylines: List[Dict[str, Any]] = []
top_terms_counter = Counter()
burst_terms_counter = Counter()
operator_totals = {
"fans_badge_user_count": 0,
"fans_badge_message_count": 0,
"high_room_level_user_count": 0,
"high_fans_level_user_count": 0,
"noble_user_count": 0,
"noble_message_count": 0,
"active_users_5plus": 0,
"active_users_10plus": 0,
}
top_badge_counter = Counter()
top_badge_message_counter = Counter()
top_active_user_map: Dict[str, Dict[str, Any]] = {}
nickname = ""
room_name = ""
for session in sessions:
messages = DouyuDanmuSummaryHelper.load_session_messages(room_id, session)
if len(messages) < self._daily_report_min_messages:
continue
# 这里额外保留一份“日报级全量消息集合”,
# 后面统一做面向 LLM 的压缩,避免按 session 先压一次、汇总时再拼回去导致结构发散。
all_messages.extend(messages)
payload = DouyuDanmuSummaryHelper.build_llm_payload(room_id, session, messages)
session_payloads.append(payload)
meta = payload.get("session_meta", {}) or {}
operator_metrics = payload.get("operator_metrics", {}) or {}
total_message_count += int(meta.get("message_count", 0) or 0)
total_noise_filtered_count += int(meta.get("noise_filtered_count", 0) or 0)
total_organized_message_count += int(meta.get("organized_message_count", 0) or 0)
nickname = nickname or str(meta.get("nickname") or session.get("nickname") or "")
room_name = room_name or str(meta.get("room_name") or session.get("room_name") or "")
for key in operator_totals:
operator_totals[key] += int(operator_metrics.get(key, 0) or 0)
for item in payload.get("merged_templates", []) or []:
merged_templates.append(dict(item))
for item in payload.get("repeated_messages", []) or []:
repeated_messages.append(dict(item))
for item in payload.get("peak_buckets", []) or []:
peak_buckets.append(dict(item))
for item in payload.get("representative_messages", []) or []:
representative_messages.append(dict(item))
for item in payload.get("raw_window_samples", []) or []:
raw_window_samples.append(dict(item))
for item in payload.get("chronological_samples", []) or []:
chronological_samples.append(dict(item))
storyline = payload.get("session_storyline") or {}
if storyline:
session_storylines.append(dict(storyline))
for item in payload.get("top_terms", []) or []:
term = str(item.get("term") or "").strip()
if term:
top_terms_counter[term] += int(item.get("count", 0) or 0)
for item in payload.get("burst_terms", []) or []:
term = str(item.get("text") or "").strip()
if term:
burst_terms_counter[term] += int(item.get("count", 0) or 0)
for item in operator_metrics.get("top_badges", []) or []:
badge_name = str(item.get("badge_name") or "").strip()
if badge_name:
top_badge_counter[badge_name] += int(item.get("user_count", 0) or 0)
top_badge_message_counter[badge_name] += int(item.get("message_count", 0) or 0)
for item in operator_metrics.get("top_active_users", []) or []:
uid = str(item.get("uid") or "").strip()
if not uid:
continue
existing = top_active_user_map.get(uid)
message_count = int(item.get("message_count", 0) or 0)
organized_message_count = int(item.get("organized_message_count", 0) or 0)
room_level = int(item.get("room_level", 0) or 0)
fans_level = int(item.get("fans_level", 0) or 0)
if not existing:
top_active_user_map[uid] = {
"uid": uid,
"nickname": str(item.get("nickname") or "").strip(),
"message_count": message_count,
"organized_message_count": organized_message_count,
"room_level": room_level,
"fans_name": str(item.get("fans_name") or "").strip(),
"fans_level": fans_level,
"noble_name": str(item.get("noble_name") or "").strip(),
}
continue
existing["message_count"] = int(existing.get("message_count", 0) or 0) + message_count
existing["organized_message_count"] = int(existing.get("organized_message_count", 0) or 0) + organized_message_count
if not str(existing.get("nickname") or "").strip():
existing["nickname"] = str(item.get("nickname") or "").strip()
if room_level > int(existing.get("room_level", 0) or 0):
existing["room_level"] = room_level
if fans_level > int(existing.get("fans_level", 0) or 0):
existing["fans_level"] = fans_level
if not str(existing.get("fans_name") or "").strip():
existing["fans_name"] = str(item.get("fans_name") or "").strip()
if not str(existing.get("noble_name") or "").strip():
existing["noble_name"] = str(item.get("noble_name") or "").strip()
for session_message in messages:
uid = str(session_message.get("uid") or "").strip()
if uid:
total_unique_users.add(uid)
if not session_payloads:
return None
merged_templates.sort(key=lambda item: int(item.get("count", 0) or 0), reverse=True)
repeated_messages.sort(key=lambda item: int(item.get("count", 0) or 0), reverse=True)
peak_buckets.sort(key=lambda item: int(item.get("message_count", 0) or 0), reverse=True)
chronological_samples.sort(key=lambda item: str(item.get("time") or ""))
session_storylines.sort(key=lambda item: str(item.get("start_time") or ""))
artifact_dir = os.path.join("temp", "douyu_materials")
os.makedirs(artifact_dir, exist_ok=True)
audience_trend = self._build_audience_trend(sessions)
room_context = self._build_room_semantic_context(room_id, nickname, room_name, sessions)
prepared_all_messages = DouyuDanmuSummaryHelper._prepare_messages(all_messages)
compact_source_messages = prepared_all_messages.get("organized_messages", []) or all_messages
llm_compact = DouyuDanmuSummaryHelper.build_compact_prompt_assets(
compact_source_messages,
bucket_minutes=5,
speaker_limit=80,
timeline_limit=24,
samples_per_bucket=6,
cue_limit=18,
)
payload = {
"report_meta": {
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": nickname,
"room_name": room_name,
"session_count": len(session_payloads),
"message_count": total_message_count,
"noise_filtered_count": total_noise_filtered_count,
"organized_message_count": total_organized_message_count,
"unique_user_count": len(total_unique_users),
},
"operator_metrics": {
**operator_totals,
"fans_badge_user_ratio": round(operator_totals["fans_badge_user_count"] / max(len(total_unique_users), 1), 4),
"top_badges": [
{
"badge_name": badge_name,
"user_count": user_count,
"message_count": int(top_badge_message_counter.get(badge_name, 0) or 0),
}
for badge_name, user_count in top_badge_counter.most_common(10)
],
"top_active_users": sorted(
top_active_user_map.values(),
key=lambda item: (
int(item.get("message_count", 0) or 0),
int(item.get("organized_message_count", 0) or 0),
),
reverse=True,
)[:12],
},
"sessions": [
{
"session_id": (item.get("session_meta", {}) or {}).get("session_id", ""),
"segments": (item.get("session_meta", {}) or {}).get("segments", []),
"message_count": (item.get("session_meta", {}) or {}).get("message_count", 0),
"organized_message_count": (item.get("session_meta", {}) or {}).get("organized_message_count", 0),
}
for item in session_payloads
],
# 直播间语义上下文:
# 1. 给 LLM 一个“这是什么圈子”的先验;
# 2. 主要用于 Dota2 这类重人物关系、重职业生涯梗的直播间;
# 3. 不替代真实弹幕,只帮助模型更准确解释黑话和典故。
"room_context": room_context,
"audience_trend": audience_trend,
"merged_templates": merged_templates[:24],
"repeated_messages": repeated_messages[:24],
"top_terms": [{"term": term, "count": count} for term, count in top_terms_counter.most_common(24)],
"burst_terms": [{"text": term, "count": count} for term, count in burst_terms_counter.most_common(16)],
"peak_buckets": peak_buckets[:10],
"representative_messages": representative_messages[:24],
"raw_window_samples": raw_window_samples[:10],
# 顺时序样本用于补足“从开播到收尾”的完整语境,避免 LLM 只看到零散热点。
"chronological_samples": chronological_samples[:40],
# 多场直播时保留每一场的轻量故事线,让粉丝日报更容易写出真正的“回放感”。
"session_storylines": session_storylines[:6],
# 专供 LLM 的压缩材料:
# 1. speaker_index 把 UID/牌子/等级从逐条消息里抽离;
# 2. timeline_digest 用“时间块 + 原句样本 + 复读线索”还原现场;
# 3. content_cues 不走中文分词,尽量保留整句高频信息。
"llm_compact": llm_compact,
}
artifact_path = os.path.join(artifact_dir, f"{room_id}_{anchor_day.replace('-', '')}_daily_report_payload.json")
with open(artifact_path, "w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
return payload
def _build_daily_report_prompt(self, payload: Dict[str, Any]) -> Tuple[str, str]:
meta = payload.get("report_meta", {}) or {}
room_context_prompt = self._build_room_context_prompt_block(payload)
prompt_material = self._build_llm_prompt_material(payload, include_operator=True)
system_prompt = (
"你是斗鱼直播日报助手。请基于给定的结构化弹幕材料,输出一份适合发群的中文日报。"
"要求简洁、自然、信息密度高,不要编造,不要使用代码块。"
"如果材料显示这是 Dota2 / 电竞语境,请优先按该圈层理解弹幕中的人物、黑话、历史梗和职业生涯梗。"
)
user_prompt = (
"请输出一份斗鱼每日报告,格式要求:\n"
"1. 第一行写标题,包含主播名和日期。\n"
"2. 用 3-5 条概括直播主线、弹幕情绪、观众关注点。\n"
"3. 单独补充运营视角观察,比如带牌活跃用户、高等级用户、核心发言用户、活跃牌子分布。\n"
"4. 单独列出高频梗/复读内容(不超过 5 条)。\n"
"5. 单独列出 2-3 个热点时段。\n"
"6. 整体控制在 600 字以内。\n\n"
f"{room_context_prompt}"
"下面是已经提纯给 LLM 的材料,其中 `topic_evidence_clusters` 和 `compact_scene_material.semantic_fact_hints` 是主阅读区:\n"
"请优先依据其中的事实证据簇、用户索引、时间线块、整句复读线索和原声样本来写,不要被大段统计信息带偏。\n"
f"材料如下:\n{json.dumps(prompt_material, ensure_ascii=False, indent=2)}"
)
return system_prompt, user_prompt
def _build_danmu_summary_prompt(self, payload: Dict[str, Any]) -> Tuple[str, str]:
meta = payload.get("report_meta", {}) or {}
room_context_prompt = self._build_room_context_prompt_block(payload)
prompt_material = self._build_llm_prompt_material(payload, include_operator=False)
system_prompt = (
"你是直播弹幕总结助手。请只根据给定材料,总结这场直播的弹幕内容与氛围。"
"不要输出运营数据,不要编造,不要写空话套话。"
"如果材料表明这是 Dota2 / 电竞直播间,请优先把梗理解为圈内人物、职业经历、赛事记忆和主播关系梗。"
)
user_prompt = (
"请输出一段适合放在日报图片上半部分的弹幕总结,要求:\n"
"1. 先用 1 段总述直播氛围与主线。\n"
"2. 再用 5 条要点总结观众关注点、情绪变化、反复出现的梗、节奏变化和额外反馈,每条只写一句。\n"
"3. 另起一行固定写标题:`【粉丝向弹幕萃取】`。\n"
"4. 在该标题下输出 4-6 条短句,尽量保留弹幕原话风格(可以保留口头语、玩梗、情绪词)。\n"
"5. 整体语气要像“直播间现场记录”,不要写成运营复盘。\n"
"6. 不要写“根据数据”“建议”“策略”等词。\n\n"
f"主播:{meta.get('nickname') or meta.get('room_name') or meta.get('room_id')}\n"
f"日期:{meta.get('anchor_day', '')}\n"
f"{room_context_prompt}"
"下面是已经提纯给 LLM 的现场材料,请优先阅读 `topic_evidence_clusters` 以及 `compact_scene_material` 中的 `semantic_fact_hints`、时间线块、整句复读线索和原声样本,"
"不要写成词频复述。\n"
f"材料:\n{json.dumps(prompt_material, ensure_ascii=False, indent=2)}"
)
return system_prompt, user_prompt
def _build_fans_daily_report_prompt(self, payload: Dict[str, Any]) -> Tuple[str, str]:
"""
粉丝版日报提示词设计目标:
1. 和运营版彻底区分开,不再强调“策略、复盘、活跃质量”;
2. 先提炼高价值信息,再保留粉丝向乐子感,避免报告只剩几条段子;
3. 允许轻微恶搞和夸张,但不能编造未出现的事件,也不能攻击主播或观众。
"""
meta = payload.get("report_meta", {}) or {}
room_context_prompt = self._build_room_context_prompt_block(payload)
prompt_material = self._build_llm_prompt_material(payload, include_operator=False)
system_prompt = (
"你是斗鱼直播间的粉丝向信息日报编辑。"
"请只根据提供的真实弹幕材料,输出一份既有信息量、又保留直播间欢乐气氛的中文总结。"
"语气要像群友在复盘直播名场面,但第一优先级是提炼有效信息,不要写成运营分析,不要编造剧情,不要使用代码块。"
"如果这是 Dota2 / 电竞语境直播间,请优先按刀圈/电竞圈人物关系、职业生涯、老比赛和主播互动梗去理解笑点。"
)
user_prompt = (
"请输出一份适合给粉丝看的《斗鱼弹幕信息日报》,严格按下面结构输出:\n"
"1. 开头先写 1 段总述,概括今天直播间的整体节目效果和气氛。\n"
"2. 另起一行写标题:`【今日重点信息】`,下面写 4-6 条 bullet优先提炼真正有效的信息。重点看赛事预告、具体日期、位置讨论、人物关系、主播近况、是否开摄像头、场外话题等。\n"
"3. 另起一行写标题:`【核心讨论话题】`,下面写 3-4 条 bullet概括今天弹幕主要围绕哪些话题打转每条都要带具体内容不要空泛。\n"
"4. 另起一行写标题:`【英雄与对局焦点】`,下面写 3-4 条 bullet提炼今天重点英雄、关键对局走势、翻盘/崩盘点、观众对操作和出装的主要反馈。\n"
"5. 另起一行写标题:`【今日笑点】`,下面写 3-4 条 bullet每条一句突出最有节目效果的地方。\n"
"6. 另起一行写标题:`【弹幕名场面】`,下面写 4-6 条 bullet尽量保留弹幕原话风格像现场回放。\n"
"7. 另起一行写标题:`【梗王榜】`,下面写 3 条 bullet把今天最刷屏、最有共识的梗排出来。\n"
"8. 另起一行写标题:`【收尾播报】`,下面只写 1 句收尾,轻松一点,像群里发图后的总结句。\n"
"9. 出现时间信息时尽量写清楚绝对日期或明确时间比如“4月30日”“18:45 前后”,不要只写“最近”“那天”。\n"
"10. 不要写“建议、策略、转化、数据表现”等运营词也不要只复述哈哈哈、gg 这种已经能由本地统计完成的噪声。\n\n"
f"主播:{meta.get('nickname') or meta.get('room_name') or meta.get('room_id')}\n"
f"日期:{meta.get('anchor_day', '')}\n"
f"{room_context_prompt}"
"下面是已经提纯给 LLM 的现场材料,请优先抓 `topic_evidence_clusters` 和 `compact_scene_material` 里的 `semantic_fact_hints`、原声弹幕、时间线块和集体起哄片段,"
"尤其留意赛事预告、位置讨论、英雄选择、关键对局、镜头调侃和团播人物关系,"
"少写空泛概括。若材料无法支持某个判断,就不要写。\n"
f"材料:\n{json.dumps(prompt_material, ensure_ascii=False, indent=2)}"
)
return system_prompt, user_prompt
def _build_llm_prompt_material(
self,
payload: Dict[str, Any],
*,
include_operator: bool = False,
) -> Dict[str, Any]:
"""
为日报 LLM 单独构建“提纯后的材料”。
设计目标:
1. 把真正有现场感的弹幕内容提到前面,降低大体量统计 JSON 对模型注意力的干扰;
2. 保留足够的梗、热点时段和原声样本,方便模型写出更像直播间回放的内容;
3. 运营指标只在确实需要的日报正文场景里保留精简版,不再无差别塞给所有任务。
"""
meta = payload.get("report_meta", {}) or {}
room_context = payload.get("room_context", {}) or {}
sessions = payload.get("sessions", []) or []
representative_messages = payload.get("representative_messages", []) or []
raw_window_samples = payload.get("raw_window_samples", []) or []
merged_templates = payload.get("merged_templates", []) or []
repeated_messages = payload.get("repeated_messages", []) or []
top_terms = payload.get("top_terms", []) or []
burst_terms = payload.get("burst_terms", []) or []
peak_buckets = payload.get("peak_buckets", []) or []
chronological_samples = payload.get("chronological_samples", []) or []
session_storylines = payload.get("session_storylines", []) or []
llm_compact = payload.get("llm_compact", {}) or {}
speaker_index = llm_compact.get("speaker_index", []) or []
timeline_digest = llm_compact.get("timeline_digest", []) or []
content_cues = llm_compact.get("content_cues", []) or []
semantic_fact_hints = llm_compact.get("semantic_fact_hints", {}) or {}
fact_topic_clusters = semantic_fact_hints.get("topic_clusters", []) or []
hero_mentions = semantic_fact_hints.get("hero_mentions", []) or []
material: Dict[str, Any] = {
"report_meta": {
"room_id": str(meta.get("room_id") or "").strip(),
"anchor_day": str(meta.get("anchor_day") or "").strip(),
"nickname": str(meta.get("nickname") or "").strip(),
"room_name": str(meta.get("room_name") or "").strip(),
"session_count": int(meta.get("session_count", 0) or 0),
"message_count": int(meta.get("message_count", 0) or 0),
"unique_user_count": int(meta.get("unique_user_count", 0) or 0),
},
"room_context": {
"domain": str(room_context.get("domain") or "").strip(),
"inferred_domains": self._normalize_text_list(room_context.get("inferred_domains"))[:6],
"identity_summary": str(room_context.get("identity_summary") or "").strip(),
"career_background": str(room_context.get("career_background") or "").strip(),
"related_people": self._normalize_text_list(room_context.get("related_people"))[:10],
"storyline_keywords": self._normalize_text_list(room_context.get("storyline_keywords"))[:10],
"style_hints": self._normalize_text_list(room_context.get("style_hints"))[:6],
},
# 本地统计层:
# 1. 这里只放“本地就能确定”的结果;
# 2. 让 LLM 只把这些统计当作背景,不再浪费能力去数哈哈哈和复读次数。
"local_stats": {
"message_count": int(meta.get("message_count", 0) or 0),
"unique_user_count": int(meta.get("unique_user_count", 0) or 0),
"top_emotion_bursts": [
{
"text": str(item.get("text") or "").strip(),
"count": int(item.get("count", 0) or 0),
}
for item in content_cues[:12]
if str(item.get("kind") or "").strip() == "emotion" and str(item.get("text") or "").strip()
][:8],
"top_repeated_messages": [
{
"text": str(item.get("text") or "").strip()[:90],
"count": int(item.get("count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
}
for item in (repeated_messages[:12] if repeated_messages else content_cues[:12])
if str(item.get("text") or "").strip()
][:8],
"peak_windows": [
{
"start_time": str(item.get("start_time") or "").strip(),
"message_count": int(item.get("message_count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
}
for item in peak_buckets[:6]
],
},
# 这是后续给 LLM 的主工作区:
# 1. 每个簇都代表“今天弹幕里正在讨论的一件事”;
# 2. 本地只做聚类和保留证据,不替模型写结论;
# 3. LLM 负责从这些簇里提炼赛事、位置、英雄、背景和场外互动信息。
"topic_evidence_clusters": [
{
"label": str(item.get("label") or "").strip(),
"count": int(item.get("match_count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
"time_range": (
f"{str(item.get('first_hm') or '').strip()}-{str(item.get('last_hm') or '').strip()}"
).strip("-"),
"keywords": [
str(keyword).strip()
for keyword in (item.get("keywords", []) or [])[:8]
if str(keyword).strip()
],
"samples": [
{
"date": str(sample.get("date") or "").strip(),
"hm": str(sample.get("hm") or "").strip(),
"nickname": str(sample.get("nickname") or "").strip(),
"content": str(sample.get("content") or "").strip()[:100],
}
for sample in (item.get("samples", []) or [])[:5]
if str(sample.get("content") or "").strip()
],
}
for item in fact_topic_clusters[:6]
if str(item.get("label") or "").strip()
],
# 这是新的主材料层,优先级高于传统的 top_terms
# 1. speaker_index 负责承接用户画像,避免在每条样本里重复塞 UUID/牌子/等级;
# 2. timeline_digest 让模型按时间推进理解“哪一段开始起哄、哪一段反复刷屏”;
# 3. content_cues 保留整句/短句级复读内容,不再依赖中文切词。
"compact_scene_material": {
"speaker_index": [
{
"speaker_id": str(item.get("speaker_id") or "").strip(),
"nickname": str(item.get("nickname") or "").strip(),
"uid_tail": str(item.get("uid_tail") or "").strip(),
"badge_name": str(item.get("badge_name") or "").strip(),
"badge_level": int(item.get("badge_level", 0) or 0),
"room_level": int(item.get("room_level", 0) or 0),
"noble_name": str(item.get("noble_name") or "").strip(),
"message_count": int(item.get("message_count", 0) or 0),
}
for item in speaker_index[:40]
if str(item.get("speaker_id") or "").strip()
],
"content_cues": [
{
"kind": str(item.get("kind") or "").strip(),
"text": str(item.get("text") or "").strip()[:90],
"count": int(item.get("count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
}
for item in content_cues[:18]
if str(item.get("text") or "").strip()
],
"semantic_fact_hints": {
"topic_clusters": [
{
"label": str(item.get("label") or "").strip(),
"match_count": int(item.get("match_count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
"first_hm": str(item.get("first_hm") or "").strip(),
"last_hm": str(item.get("last_hm") or "").strip(),
"keywords": [
str(keyword).strip()
for keyword in (item.get("keywords", []) or [])[:8]
if str(keyword).strip()
],
"samples": [
{
"date": str(sample.get("date") or "").strip(),
"hm": str(sample.get("hm") or "").strip(),
"nickname": str(sample.get("nickname") or "").strip(),
"content": str(sample.get("content") or "").strip()[:100],
}
for sample in (item.get("samples", []) or [])[:5]
if str(sample.get("content") or "").strip()
],
}
for item in fact_topic_clusters[:6]
if str(item.get("label") or "").strip()
],
"hero_mentions": [
{
"hero": str(item.get("hero") or "").strip(),
"mention_count": int(item.get("mention_count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
"samples": [
{
"hm": str(sample.get("hm") or "").strip(),
"nickname": str(sample.get("nickname") or "").strip(),
"content": str(sample.get("content") or "").strip()[:100],
}
for sample in (item.get("samples", []) or [])[:4]
if str(sample.get("content") or "").strip()
],
}
for item in hero_mentions[:6]
if str(item.get("hero") or "").strip()
],
},
"timeline_digest": [
{
"date": str(item.get("date") or "").strip(),
"start_hm": str(item.get("start_hm") or "").strip(),
"message_count": int(item.get("message_count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
"repeated_cues": [
{
"text": str(cue.get("text") or "").strip()[:80],
"count": int(cue.get("count", 0) or 0),
"user_count": int(cue.get("user_count", 0) or 0),
}
for cue in (item.get("repeated_cues", []) or [])[:3]
if str(cue.get("text") or "").strip()
],
"samples": [
{
"speaker_id": str(sample.get("speaker_id") or "").strip(),
"hm": str(sample.get("hm") or "").strip(),
"content": str(sample.get("content") or "").strip()[:90],
}
for sample in (item.get("samples", []) or [])[:6]
if str(sample.get("content") or "").strip()
],
}
for item in timeline_digest[:20]
if (item.get("samples") or item.get("repeated_cues"))
],
},
"session_overview": [
{
"session_id": str(item.get("session_id") or "").strip(),
"segments": item.get("segments", []) or [],
"message_count": int(item.get("message_count", 0) or 0),
"organized_message_count": int(item.get("organized_message_count", 0) or 0),
}
for item in sessions[:4]
],
"high_frequency_topics": {
# 这里刻意不再把中文分词结果作为主字段喂给 LLM
# 避免模型把碎词误当成主线;真正的内容理解优先走 compact_scene_material。
"top_terms_legacy": [
{"term": str(item.get("term") or "").strip(), "count": int(item.get("count", 0) or 0)}
for item in top_terms[:8]
if str(item.get("term") or "").strip()
],
"burst_terms": [
{"text": str(item.get("text") or "").strip(), "count": int(item.get("count", 0) or 0)}
for item in burst_terms[:12]
if str(item.get("text") or "").strip()
],
"merged_templates": [
{
"text": str(item.get("text") or "").strip()[:80],
"count": int(item.get("count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
}
for item in merged_templates[:12]
if str(item.get("text") or "").strip()
],
"repeated_messages": [
{
"text": str(item.get("text") or "").strip()[:80],
"count": int(item.get("count", 0) or 0),
"user_count": int(item.get("user_count", 0) or 0),
}
for item in repeated_messages[:12]
if str(item.get("text") or "").strip()
],
},
# 这里把“现场材料”单独抽成一大块,方便模型先看原声,再看统计摘要。
"live_scene_material": {
"representative_messages": [
{
"time": str(item.get("time") or "").strip(),
"nickname": str(item.get("nickname") or "").strip(),
"content": str(item.get("content") or "").strip()[:90],
}
for item in representative_messages[:24]
if str(item.get("content") or "").strip()
],
"hot_window_samples": [
{
"start_time": str(window.get("start_time") or "").strip(),
"message_count": int(window.get("message_count", 0) or 0),
"user_count": int(window.get("user_count", 0) or 0),
"top_terms": [
str(term.get("term") or "").strip()
for term in (peak_buckets[index].get("top_terms", []) or [])[:5]
if str(term.get("term") or "").strip()
] if index < len(peak_buckets) else [],
"samples": [
{
"time": str(sample.get("time") or "").strip(),
"nickname": str(sample.get("nickname") or "").strip(),
"content": str(sample.get("content") or "").strip()[:90],
}
for sample in (window.get("samples", []) or [])[:8]
if str(sample.get("content") or "").strip()
],
}
for index, window in enumerate(raw_window_samples[:8])
],
"chronological_samples": [
{
"time": str(item.get("time") or "").strip(),
"nickname": str(item.get("nickname") or "").strip(),
"content": str(item.get("content") or "").strip()[:90],
}
for item in chronological_samples[:24]
if str(item.get("content") or "").strip()
],
"session_storylines": [
{
"start_time": str(item.get("start_time") or "").strip(),
"end_time": str(item.get("end_time") or "").strip(),
"top_terms": [
{
"term": str(term.get("term") or "").strip(),
"count": int(term.get("count", 0) or 0),
}
for term in (item.get("top_terms", []) or [])[:8]
if str(term.get("term") or "").strip()
],
"burst_terms": [
{
"text": str(term.get("text") or "").strip(),
"count": int(term.get("count", 0) or 0),
}
for term in (item.get("burst_terms", []) or [])[:6]
if str(term.get("text") or "").strip()
],
"hottest_moment": {
"start_time": str(((item.get("hottest_moment") or {}).get("start_time")) or "").strip(),
"message_count": int(((item.get("hottest_moment") or {}).get("message_count", 0)) or 0),
"user_count": int(((item.get("hottest_moment") or {}).get("user_count", 0)) or 0),
"top_terms": [
str(term.get("term") or "").strip()
for term in (((item.get("hottest_moment") or {}).get("top_terms", [])) or [])[:6]
if str(term.get("term") or "").strip()
],
},
"chronological_samples": [
{
"time": str(sample.get("time") or "").strip(),
"nickname": str(sample.get("nickname") or "").strip(),
"content": str(sample.get("content") or "").strip()[:90],
}
for sample in (item.get("chronological_samples", []) or [])[:8]
if str(sample.get("content") or "").strip()
],
}
for item in session_storylines[:4]
],
},
}
if include_operator:
operator = payload.get("operator_metrics", {}) or {}
material["operator_focus"] = {
"fans_badge_user_count": int(operator.get("fans_badge_user_count", 0) or 0),
"high_room_level_user_count": int(operator.get("high_room_level_user_count", 0) or 0),
"high_fans_level_user_count": int(operator.get("high_fans_level_user_count", 0) or 0),
"active_users_5plus": int(operator.get("active_users_5plus", 0) or 0),
"active_users_10plus": int(operator.get("active_users_10plus", 0) or 0),
"top_badges": [
{
"badge_name": str(item.get("badge_name") or "").strip(),
"user_count": int(item.get("user_count", 0) or 0),
"message_count": int(item.get("message_count", 0) or 0),
}
for item in (operator.get("top_badges", []) or [])[:6]
if str(item.get("badge_name") or "").strip()
],
"top_active_users": [
{
"nickname": str(item.get("nickname") or item.get("uid") or "").strip(),
"message_count": int(item.get("message_count", 0) or 0),
"fans_name": str(item.get("fans_name") or "").strip(),
"fans_level": int(item.get("fans_level", 0) or 0),
"room_level": int(item.get("room_level", 0) or 0),
}
for item in (operator.get("top_active_users", []) or [])[:8]
],
}
return material
@staticmethod
def _clean_daily_report_llm_text(text: str) -> str:
"""
统一清理日报类 LLM 输出中的思考内容。
重点处理:
1. <think> / <thinking> / <reasoning> 标签;
2. 某些模型额外吐出来的“思考过程”“分析过程”等段落;
3. 清理后顺手压缩多余空行,避免图片模板里出现大片空白。
"""
cleaned = remove_reasoning_content(str(text or "").strip())
cleaned = re.sub(r"\n{3,}", "\n\n", cleaned).strip()
return cleaned
def _build_funny_scene_lines(self, payload: Dict[str, Any], limit: int = 5) -> List[str]:
"""
组装“弹幕名场面”兜底素材。
优先级:
1. 代表性原始弹幕,保证现场感;
2. 重复刷屏梗,保证“今天大家到底在笑什么”能被看出来。
"""
lines: List[str] = []
seen = set()
def push(text: str) -> None:
value = str(text or "").strip()
if not value:
return
normalized = value.lower()
if normalized in seen:
return
seen.add(normalized)
lines.append(value)
for item in (payload.get("representative_messages", []) or [])[:12]:
nickname = str(item.get("nickname") or "").strip() or "观众"
content = str(item.get("content") or "").strip()
if content:
push(f"{nickname}{content[:48]}")
if len(lines) >= limit:
return lines[:limit]
for item in (payload.get("repeated_messages", []) or [])[:6]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
push(f"复读现场:{text[:40]}(今天被刷了 {count} 次)")
if len(lines) >= limit:
return lines[:limit]
return lines[:limit]
def _build_fans_extract_lines(self, payload: Dict[str, Any], limit: int = 6) -> List[str]:
# 粉丝向萃取强调“可读、像现场弹幕”,优先取代表发言,再补充重复梗与情绪短词。
representative_messages = payload.get("representative_messages", []) or []
repeated_messages = payload.get("repeated_messages", []) or []
merged_templates = payload.get("merged_templates", []) or []
burst_terms = payload.get("burst_terms", []) or []
lines: List[str] = []
seen = set()
def push(text: str) -> None:
value = str(text or "").strip()
if not value:
return
key = value.lower()
if key in seen:
return
seen.add(key)
lines.append(value)
for item in representative_messages[:10]:
nickname = str(item.get("nickname") or "").strip() or "观众"
content = str(item.get("content") or "").strip()
if content:
push(f"{nickname}{content[:56]}")
if len(lines) >= limit:
return lines[:limit]
for item in repeated_messages[:6]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
push(f"复读梗「{text[:36]}」刷了 {count} 次。")
if len(lines) >= limit:
return lines[:limit]
for item in merged_templates[:6]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
push(f"共识弹幕「{text[:36]}」出现 {count} 次。")
if len(lines) >= limit:
return lines[:limit]
for item in burst_terms[:4]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
push(f"情绪短词「{text}」集中出现 {count} 次。")
if len(lines) >= limit:
return lines[:limit]
return lines[:limit]
def _build_fallback_fans_daily_report(self, payload: Dict[str, Any]) -> str:
"""
当 LLM 不可用或返回空内容时,仍然生成一份可直接发群的粉丝版日报。
兜底文本保持“有梗但不胡编”的原则,所有句子都只从真实弹幕统计结果里取材。
"""
meta = payload.get("report_meta", {}) or {}
topic_clusters = payload.get("topic_evidence_clusters", []) or []
hero_mentions = (
payload.get("compact_scene_material", {})
.get("semantic_fact_hints", {})
.get("hero_mentions", [])
or []
)
top_terms = [
str(item.get("term") or "").strip()
for item in (payload.get("top_terms", []) or [])[:5]
if str(item.get("term") or "").strip()
]
merged_templates = payload.get("merged_templates", []) or []
burst_terms = payload.get("burst_terms", []) or []
peak_buckets = payload.get("peak_buckets", []) or []
repeated_messages = payload.get("repeated_messages", []) or []
anchor_day = str(meta.get("anchor_day", "") or "")
lead_parts = [
f"{anchor_day} 这场直播,弹幕区整体处于高能围观状态,大家一边盯着直播内容,一边围着"
f"{''.join(top_terms[:4]) or '节目效果'}疯狂接梗。"
]
if merged_templates:
lead_parts.append(
f"尤其是「{str(merged_templates[0].get('text') or '').strip()[:26]}」这类共识弹幕,一看就是全场默认会背。"
)
lines = [" ".join(lead_parts).strip(), "【今日重点信息】"]
for item in topic_clusters[:3]:
label = str(item.get("label") or "").strip()
time_range = str(item.get("time_range") or "").strip()
count = int(item.get("count", 0) or 0)
samples = item.get("samples", []) or []
sample_text = ""
if samples:
sample_text = str(samples[0].get("content") or "").strip()[:38]
if label and sample_text:
lines.append(f"- {label}{time_range or '全场'} 一直有人聊,约 {count} 条相关弹幕,代表说法是「{sample_text}」。")
elif label:
lines.append(f"- {label}是今天的重点主线之一,相关弹幕约 {count} 条。")
lines.append("【核心讨论话题】")
for item in topic_clusters[:3]:
label = str(item.get("label") or "").strip()
keywords = [str(keyword).strip() for keyword in (item.get("keywords", []) or [])[:5] if str(keyword).strip()]
if label and keywords:
lines.append(f"- 大家围着 {label} 打转,关键词主要是 {''.join(keywords)}")
lines.append("【英雄与对局焦点】")
for item in hero_mentions[:3]:
hero_name = str(item.get("hero") or "").strip()
mention_count = int(item.get("mention_count", 0) or 0)
samples = item.get("samples", []) or []
sample_text = ""
if samples:
sample_text = str(samples[0].get("content") or "").strip()[:36]
if hero_name and sample_text:
lines.append(f"- {hero_name}被点名 {mention_count} 次,弹幕现场直接说到「{sample_text}」。")
elif hero_name:
lines.append(f"- {hero_name}是今天的主要英雄话题之一,被提到 {mention_count} 次。")
lines.append("【今日笑点】")
if peak_buckets:
top_bucket = peak_buckets[0]
lines.append(
f"- {str(top_bucket.get('start_time') or '')[-8:-3]} 前后弹幕密度冲高,直播间像突然集体抢到麦,乐子值直接拉满。"
)
if repeated_messages:
first_repeat = repeated_messages[0]
lines.append(
f"- 复读冠军是「{str(first_repeat.get('text') or '').strip()[:32]}」,光这句就被来回刷了 {int(first_repeat.get('count', 0) or 0)} 次。"
)
if burst_terms:
first_burst = burst_terms[0]
lines.append(
f"- 情绪词「{str(first_burst.get('text') or '').strip()}」反复出现 {int(first_burst.get('count', 0) or 0)} 次,说明那一段大家已经彻底上头。"
)
if top_terms:
lines.append(f"- 今天的集体关注点基本围着 {''.join(top_terms[:4])} 打转,谁路过都会被梗吸进去。")
lines.append("【弹幕名场面】")
for item in self._build_funny_scene_lines(payload, limit=5):
lines.append(f"- {item}")
lines.append("【梗王榜】")
rank_items: List[str] = []
for item in merged_templates[:2]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
rank_items.append(f"{text[:30]}|全场 {count}")
for item in burst_terms[:2]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
rank_items.append(f"{text}|情绪爆发 {count}")
for item in repeated_messages[:3]:
if len(rank_items) >= 3:
break
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
candidate = f"{text[:30]}|复读 {count}"
if candidate not in rank_items:
rank_items.append(candidate)
for item in rank_items[:3]:
lines.append(f"- {item}")
lines.append("【收尾播报】")
if peak_buckets:
lines.append(
f"- 今天的直播内容未必全部记住了,但 {str(peak_buckets[0].get('start_time') or '')[-8:-3]} 那波弹幕起哄,已经足够做成群内经典片段。"
)
else:
lines.append("- 今天的直播总结成一句话就是:画面会结束,梗不会下播。")
return "\n".join(lines).strip()
def _build_fallback_daily_report(self, payload: Dict[str, Any]) -> str:
meta = payload.get("report_meta", {}) or {}
title_name = str(meta.get("nickname") or meta.get("room_name") or meta.get("room_id") or "主播")
lines = [
f"斗鱼每日报告 | {title_name} | {meta.get('anchor_day', '')}",
f"{meta.get('session_count', 0)} 场,弹幕 {meta.get('message_count', 0)} 条,参与用户 {meta.get('unique_user_count', 0)} 人。",
]
operator_metrics = payload.get("operator_metrics", {}) or {}
sessions = payload.get("sessions", []) or []
if sessions:
session_parts = []
for item in sessions[:4]:
segments = item.get("segments", []) or []
if not segments:
continue
start_time = str(segments[0].get("start_time", ""))[-8:-3]
end_time = str(segments[-1].get("end_time", ""))[-8:-3]
session_parts.append(f"{start_time}-{end_time}")
if session_parts:
lines.append("场次时间:" + " / ".join(session_parts))
top_terms = payload.get("top_terms", []) or []
if top_terms:
lines.append("关注焦点:" + "".join([str(item.get("term") or "") for item in top_terms[:8] if str(item.get("term") or "").strip()]))
if operator_metrics:
op_parts = []
fans_badge_user_count = int(operator_metrics.get("fans_badge_user_count", 0) or 0)
high_room_level_user_count = int(operator_metrics.get("high_room_level_user_count", 0) or 0)
high_fans_level_user_count = int(operator_metrics.get("high_fans_level_user_count", 0) or 0)
active_users_10plus = int(operator_metrics.get("active_users_10plus", 0) or 0)
if fans_badge_user_count:
op_parts.append(f"带牌活跃用户 {fans_badge_user_count}")
if high_room_level_user_count:
op_parts.append(f"30级+活跃用户 {high_room_level_user_count}")
if high_fans_level_user_count:
op_parts.append(f"10级+粉丝牌用户 {high_fans_level_user_count}")
if active_users_10plus:
op_parts.append(f"高活跃核心用户 {active_users_10plus}")
if op_parts:
lines.append("运营侧:" + "".join(op_parts))
top_badges = operator_metrics.get("top_badges", []) or []
if top_badges:
lines.append("活跃粉丝牌:")
for item in top_badges[:5]:
badge_name = str(item.get("badge_name") or "").strip()
user_count = int(item.get("user_count", 0) or 0)
message_count = int(item.get("message_count", 0) or 0)
if badge_name:
lines.append(f"- {badge_name}{user_count}人,{message_count}")
merged_templates = payload.get("merged_templates", []) or []
if merged_templates:
lines.append("高频梗:")
for item in merged_templates[:5]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
lines.append(f"- {text[:42]}{count}次)")
peak_buckets = payload.get("peak_buckets", []) or []
if peak_buckets:
lines.append("热点时段:")
for item in peak_buckets[:3]:
start_time = str(item.get("start_time") or "")[-8:-3]
message_count = int(item.get("message_count", 0) or 0)
terms = "".join(
[str(term.get("term") or "") for term in (item.get("top_terms", []) or [])[:4] if str(term.get("term") or "").strip()]
)
lines.append(f"- {start_time}{message_count}条,关键词:{terms}")
representative_messages = payload.get("representative_messages", []) or []
if representative_messages:
lines.append("代表弹幕:")
for item in representative_messages[:4]:
nickname = str(item.get("nickname") or "").strip()
content = str(item.get("content") or "").strip()
if content:
lines.append(f"- {nickname}{content[:60]}")
text = "\n".join(lines).strip()
if len(text) > self._daily_report_max_length:
text = text[: self._daily_report_max_length - 20].rstrip() + "\n...(已截断)"
return text
def _build_fallback_danmu_summary(self, payload: Dict[str, Any]) -> str:
meta = payload.get("report_meta", {}) or {}
top_terms = [str(item.get("term") or "").strip() for item in (payload.get("top_terms", []) or [])[:6] if str(item.get("term") or "").strip()]
merged_templates = payload.get("merged_templates", []) or []
peak_buckets = payload.get("peak_buckets", []) or []
representative_messages = payload.get("representative_messages", []) or []
lines = [
f"{meta.get('anchor_day', '')} 这场直播弹幕整体比较密集,讨论重心主要围绕 {''.join(top_terms[:4]) or '对局过程'} 展开,观众互动意愿较强,梗和复读内容持续出现。"
]
if merged_templates:
sample_templates = "".join(
[str(item.get("text") or "").strip()[:26] for item in merged_templates[:3] if str(item.get("text") or "").strip()]
)
if sample_templates:
lines.append(f"- 主线观察:直播间共识梗很强,重复刷屏内容主要集中在 {sample_templates}")
for item in merged_templates[:4]:
break
if peak_buckets:
top_bucket = peak_buckets[0]
terms = "".join(
[str(term.get("term") or "") for term in (top_bucket.get("top_terms", []) or [])[:4] if str(term.get("term") or "").strip()]
)
lines.append(
f"- 节奏变化:高峰集中在 {str(top_bucket.get('start_time') or '')[-8:-3]} 前后,单时段弹幕 {int(top_bucket.get('message_count', 0) or 0)} 条,关键词偏向 {terms}"
)
if len(peak_buckets) > 1:
second_bucket = peak_buckets[1]
second_terms = "".join(
[str(term.get("term") or "") for term in (second_bucket.get("top_terms", []) or [])[:4] if str(term.get("term") or "").strip()]
)
lines.append(
f"- 热点补充:{str(second_bucket.get('start_time') or '')[-8:-3]} 也出现明显抬升,弹幕讨论继续围绕 {second_terms} 展开。"
)
if representative_messages:
lines.append("- 情绪特点:代表性发言里既有对操作和决策的即时反馈,也有大量玩梗、调侃和情绪宣泄。")
if top_terms:
lines.append(f"- 关注焦点:高频词主要落在 {''.join(top_terms[:6])},说明观众注意力相对集中。")
# 在兜底模式下也强制补出“粉丝向弹幕萃取”,避免图片模板出现空区块。
fans_extract_lines = self._build_fans_extract_lines(payload, limit=6)
if fans_extract_lines:
lines.append("【粉丝向弹幕萃取】")
for item in fans_extract_lines:
lines.append(f"- {item}")
return "\n".join(lines).strip()
def _build_operator_summary_text(self, payload: Dict[str, Any]) -> str:
meta = payload.get("report_meta", {}) or {}
operator_metrics = payload.get("operator_metrics", {}) or {}
total_users = int(meta.get("unique_user_count", 0) or 0)
fans_badge_users = int(operator_metrics.get("fans_badge_user_count", 0) or 0)
high_room_users = int(operator_metrics.get("high_room_level_user_count", 0) or 0)
high_fans_users = int(operator_metrics.get("high_fans_level_user_count", 0) or 0)
active_users_5plus = int(operator_metrics.get("active_users_5plus", 0) or 0)
active_users_10plus = int(operator_metrics.get("active_users_10plus", 0) or 0)
fans_badge_ratio = float(operator_metrics.get("fans_badge_user_ratio", 0) or 0)
lines = [
f"- 活跃用户规模:{total_users} 人,其中发言 5 次以上 {active_users_5plus}10 次以上 {active_users_10plus} 人。",
f"- 粉丝粘性:带粉丝牌活跃用户 {fans_badge_users} 人,占活跃用户 {fans_badge_ratio * 100:.1f}%10 级以上粉丝牌用户 {high_fans_users} 人。",
f"- 用户质量:房间等级 30 级以上活跃用户 {high_room_users} 人,说明高等级老观众参与度不低。",
]
audience_summary = (payload.get("audience_trend", {}) or {}).get("summary", {}) or {}
if audience_summary:
vip_min = int(audience_summary.get("vip_min", 0) or 0)
vip_max = int(audience_summary.get("vip_max", 0) or 0)
diamond_latest = int(audience_summary.get("diamond_latest", 0) or 0)
point_count = int(audience_summary.get("point_count", 0) or 0)
lines.append(
f"- 人数走势WS 侧共采样 {point_count} 个时间点,贵宾在 {vip_min}-{vip_max} 区间波动,钻粉收盘约 {diamond_latest}"
)
top_badges = payload.get("operator_metrics", {}).get("top_badges", []) or []
if top_badges:
badge_parts = []
for item in top_badges[:5]:
badge_name = str(item.get("badge_name") or "").strip()
if not badge_name:
continue
badge_parts.append(f"{badge_name} {int(item.get('user_count', 0) or 0)}人/{int(item.get('message_count', 0) or 0)}")
if badge_parts:
lines.append(f"- 活跃牌子分布:{''.join(badge_parts)}")
top_active_users = payload.get("operator_metrics", {}).get("top_active_users", []) or []
if top_active_users:
core_parts = []
for item in top_active_users[:5]:
nickname = str(item.get("nickname") or item.get("uid") or "").strip()
msg_count = int(item.get("message_count", 0) or 0)
fans_name = str(item.get("fans_name") or "").strip()
fans_level = int(item.get("fans_level", 0) or 0)
room_level = int(item.get("room_level", 0) or 0)
tags = []
if fans_name:
if fans_level > 0:
tags.append(f"{fans_name} Lv{fans_level}")
else:
tags.append(fans_name)
if room_level > 0:
tags.append(f"平台 Lv{room_level}")
tags.append(f"{msg_count}")
core_parts.append(f"{nickname}{''.join(tags)}")
if core_parts:
lines.append(f"- 核心发言用户:{''.join(core_parts)}")
return "\n".join(lines).strip()
def _build_operator_summary_lines(self, payload: Dict[str, Any]) -> List[str]:
return [line.strip()[2:].strip() for line in self._build_operator_summary_text(payload).splitlines() if line.strip().startswith("- ")]
def _build_dify_daily_report_inputs(
self,
*,
task_type: str,
system_prompt: str,
user_prompt: str,
payload: Dict[str, Any],
) -> Dict[str, Any]:
"""
组装斗鱼日报在 Dify Workflow 下的输入参数。
设计目标:
1. 让工作流既能拿到“最终自然语言提示词”,也能拿到“结构化原始载荷”;
2. 让一个工作流通过 task_type 同时处理「日报正文」和「弹幕摘要」两类任务;
3. 保留关键元信息,便于在工作流内做分支、日志与降级兜底。
"""
meta = payload.get("report_meta", {}) or {}
room_id = str(meta.get("room_id") or "").strip()
anchor_day = str(meta.get("anchor_day") or "").strip()
nickname = str(meta.get("nickname") or meta.get("room_name") or "").strip()
# 说明:
# 1. 部分 Dify Workflow 对输入变量类型校验较严格,复杂对象(dict/list)容易触发 400
# 2. 默认只提交精简字符串字段,优先保证链路可用;
# 3. 如需在工作流内使用完整载荷,可通过 include_structured_inputs 开关启用。
inputs = {
# 任务路由字段:在 Dify 条件分支里用于区分日报正文/弹幕摘要。
"task_type": task_type,
# 兼容 Workflow 中直接读取 query 的场景。
"query": user_prompt,
# 保留原有两段提示词,便于工作流内部二次拼装或调试。
"system_prompt": system_prompt,
"user_prompt": user_prompt,
# 关键元信息:用于日志、标题拼接、数据看板或异常追踪。
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": nickname,
# 控制输出长度Dify 该变量在部分工作流中配置为 paragraph(字符串)类型,
# 因此这里统一传字符串,避免出现 “max_length must be a string” 的 400 校验错误。
"max_length": str(int(self._daily_report_max_length or 1800)),
}
if self._daily_report_include_structured_inputs:
inputs["report_payload_json"] = json.dumps(payload, ensure_ascii=False)
return inputs
def _build_room_background_profile_seed(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""
从日报载荷里抽取一份“适合给背景画像模型看的精简材料”。
这样做有两个好处:
1. 不必把整份大 payload 都重新塞给模型,减少 token 和噪音;
2. 即使模型没有联网能力,也能依据房间标签、代表弹幕、高频词做保守推断。
"""
meta = payload.get("report_meta", {}) or {}
room_context = payload.get("room_context", {}) or {}
runtime_context = room_context.get("runtime_context", {}) or {}
room_id = str(meta.get("room_id") or "").strip()
representative_messages = []
for item in (payload.get("representative_messages", []) or [])[:6]:
content = str(item.get("content") or "").strip()
if not content:
continue
representative_messages.append({
"nickname": str(item.get("nickname") or "").strip(),
"content": content[:90],
})
merged_templates = []
for item in (payload.get("merged_templates", []) or [])[:8]:
text = str(item.get("text") or "").strip()
if not text:
continue
merged_templates.append({
"text": text[:48],
"count": int(item.get("count", 0) or 0),
})
repeated_messages = []
for item in (payload.get("repeated_messages", []) or [])[:6]:
text = str(item.get("text") or item.get("content") or "").strip()
if not text:
continue
repeated_messages.append({
"text": text[:48],
"count": int(item.get("count", 0) or 0),
})
manual_profile = self._match_room_context_profile(room_id)
return {
"room_meta": {
"room_id": room_id,
"nickname": str(meta.get("nickname") or "").strip(),
"room_name": str(meta.get("room_name") or "").strip(),
"anchor_day": str(meta.get("anchor_day") or "").strip(),
},
"runtime_context": {
"primary_category": str(runtime_context.get("primary_category") or "").strip(),
"secondary_category": str(runtime_context.get("secondary_category") or "").strip(),
"game_name": str(runtime_context.get("game_name") or "").strip(),
"tags": self._normalize_text_list(runtime_context.get("tags"))[:10],
},
"inferred_domains": self._normalize_text_list(room_context.get("inferred_domains"))[:6],
"top_terms": [
str(item.get("term") or "").strip()
for item in (payload.get("top_terms", []) or [])[:12]
if str(item.get("term") or "").strip()
],
"merged_templates": merged_templates,
"repeated_messages": repeated_messages,
"representative_messages": representative_messages,
# 手工画像快照一并传入,方便模型只补缺、不“推翻人工设定”。
"manual_profile_hint": {
"domain": str(manual_profile.get("domain") or "").strip(),
"identity_summary": str(manual_profile.get("identity_summary") or "").strip(),
"career_background": str(manual_profile.get("career_background") or "").strip(),
"related_people": self._normalize_text_list(manual_profile.get("related_people"))[:10],
"storyline_keywords": self._normalize_text_list(manual_profile.get("storyline_keywords"))[:10],
},
}
def _build_room_background_profile_prompt(self, payload: Dict[str, Any]) -> Tuple[str, str, Dict[str, Any]]:
"""
构造“主播背景画像”提示词。
设计原则:
1. 优先检索公开资料;若当前模型没有检索能力,则退化为保守推断;
2. 严格要求 JSON 输出,方便直接入 Redis
3. 不确定就留空,宁可少写,也不要把职业生涯、圈内关系硬编出来。
"""
seed = self._build_room_background_profile_seed(payload)
system_prompt = (
"你是斗鱼直播间背景画像整理助手。"
"请根据给定房间信息,整理一份给日报模型使用的主播背景 JSON。"
"如果你具备联网、搜索、知识库或检索能力,请优先检索公开资料再整理;"
"如果你不具备检索能力,只能根据输入材料做保守判断,不确定的字段必须留空。"
"输出必须是 JSON 对象,不要输出代码块,不要补充额外解释。"
)
user_prompt = (
"请只输出一个 JSON 对象,字段固定为:\n"
"{\n"
" \"domain\": \"\",\n"
" \"domain_keywords\": [],\n"
" \"identity_summary\": \"\",\n"
" \"career_background\": \"\",\n"
" \"related_people\": [],\n"
" \"storyline_keywords\": [],\n"
" \"meme_explanations\": [],\n"
" \"style_hints\": []\n"
"}\n\n"
"规则:\n"
"1. identity_summary 要像“这是什么类型主播、观众通常围绕什么背景接梗”的一句话。\n"
"2. career_background 只写公开且较稳定的职业经历、圈层身份、转型轨迹;不确定就留空。\n"
"3. related_people 只保留和该主播强相关的人物;不确定不要硬猜。\n"
"4. meme_explanations 和 style_hints 要服务日报理解,不要写百科长文。\n"
"5. 如果主播不是 Dota2 主播,也要按其真实领域整理,不要强行往 Dota2 上靠。\n"
"6. 如果资料存在歧义、重名或证据不足,直接留空,不要为了凑字段而硬编。\n\n"
f"输入材料:\n{json.dumps(seed, ensure_ascii=False, indent=2)}"
)
return system_prompt, user_prompt, seed
def _build_dify_room_background_inputs(
self,
*,
system_prompt: str,
user_prompt: str,
seed: Dict[str, Any],
) -> Dict[str, Any]:
"""
组装“房间背景画像”任务在 Dify Workflow 下的输入。
这里复用现有 scene但通过单独 task_type 走到新的 Workflow 分支,
让 Dify 端可以后续挂检索/知识库节点,而插件侧接口保持不变。
"""
room_meta = seed.get("room_meta", {}) or {}
return {
"task_type": "room_background_profile",
"query": user_prompt,
"system_prompt": system_prompt,
"user_prompt": user_prompt,
"room_id": str(room_meta.get("room_id") or "").strip(),
"anchor_day": str(room_meta.get("anchor_day") or "").strip(),
"nickname": str(room_meta.get("nickname") or room_meta.get("room_name") or "").strip(),
"max_length": "1200",
"report_payload_json": json.dumps(seed, ensure_ascii=False),
}
def _call_room_background_profile_llm(
self,
*,
system_prompt: str,
user_prompt: str,
seed: Dict[str, Any],
) -> str:
"""
调用统一 LLM 客户端生成背景画像文本。
与日报正文链路保持同样的 provider 兼容策略:
1. Dify provider 走 workflow/chat 的 run(inputs)
2. 其他 provider 走普通 chat(system, user)。
"""
if not self._daily_report_llm_client:
return ""
room_meta = seed.get("room_meta", {}) or {}
room_id = str(room_meta.get("room_id") or "").strip()
user_id = f"douyu_room_background_{room_id or 'unknown'}"
if self._daily_report_llm_client.provider == "dify":
result = self._daily_report_llm_client.run(
prompt=user_prompt,
user=user_id,
inputs=self._build_dify_room_background_inputs(
system_prompt=system_prompt,
user_prompt=user_prompt,
seed=seed,
),
tag=f"douyu_room_background_{room_id or 'unknown'}",
)
return str((result or {}).get("text", "") or "").strip()
return self._daily_report_llm_client.chat(
system_prompt,
user_prompt,
user_id=user_id,
).strip()
def _generate_room_background_profile(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""
同步生成一份可缓存到 Redis 的背景画像。
这个方法会被 asyncio.to_thread 包裹执行,避免阻塞主事件循环。
"""
if not self._daily_report_llm_client:
return {}
system_prompt, user_prompt, seed = self._build_room_background_profile_prompt(payload)
response_text = self._call_room_background_profile_llm(
system_prompt=system_prompt,
user_prompt=user_prompt,
seed=seed,
)
if not response_text:
logger.warning(
f"斗鱼房间背景画像生成失败: room={((seed.get('room_meta', {}) or {}).get('room_id', ''))}, "
f"last_error={self._daily_report_llm_client.last_error}"
)
return {}
parsed = self._extract_json_object_from_text(response_text)
if not parsed:
logger.warning(
f"斗鱼房间背景画像返回非 JSON已忽略: room={((seed.get('room_meta', {}) or {}).get('room_id', ''))}, "
f"preview={response_text[:180]}"
)
return {}
normalized = self._normalize_auto_room_background_profile(parsed)
if not normalized:
return {}
return normalized
async def _ensure_room_background_profile(
self,
room_id: str,
nickname: str,
room_name: str,
sessions: List[Dict[str, Any]],
payload: Dict[str, Any],
*,
force_refresh: bool = False,
) -> Dict[str, Any]:
"""
在生成日报前,确保房间背景画像已经就绪。
流程说明:
1. 先看 Redis 里是否已有缓存;
2. 没缓存或强制刷新时,就直接触发一次 LLM 自动画像;
3. 无论是否生成成功,最后都重新构建 room_context确保 payload 使用最新缓存。
"""
if not payload:
return payload
meta = payload.get("report_meta", {}) or {}
room_id = str(room_id or meta.get("room_id") or "").strip()
nickname = str(nickname or meta.get("nickname") or "").strip()
room_name = str(room_name or meta.get("room_name") or "").strip()
if not room_id:
return payload
cached_profile = (
self.redis_manager.get_room_background_profile(room_id) if self.redis_manager else {}
) or {}
should_build = (
self._auto_room_background_profile_enable
and self._daily_report_use_llm
and self._daily_report_llm_client is not None
and self.redis_manager is not None
and (force_refresh or not self._profile_has_meaningful_content(cached_profile))
)
if should_build:
generated_profile = await asyncio.to_thread(
self._generate_room_background_profile,
payload,
)
if generated_profile:
ttl_seconds = max(int(self._auto_room_background_profile_ttl_seconds or 0), 3600)
self.redis_manager.set_room_background_profile(
room_id,
generated_profile,
ttl_seconds=ttl_seconds,
)
logger.info(
f"斗鱼房间背景画像已刷新并缓存到 Redis: room={room_id}, ttl={ttl_seconds}s"
)
# 这里无论是否触发了自动画像,都重新构建一次 room_context
# 1. 若刚刚写入 Redis新画像会立刻反映到 payload
# 2. 若没有新画像,也能统一走“手工画像 + Redis 缓存 + 实时房间信息”的最新合并逻辑。
payload["room_context"] = self._build_room_semantic_context(room_id, nickname, room_name, sessions)
return payload
def _call_daily_report_llm(
self,
*,
task_type: str,
system_prompt: str,
user_prompt: str,
payload: Dict[str, Any],
tag: str,
) -> str:
"""
统一封装斗鱼日报 LLM 调用。
- Dify provider走 run(inputs) 进入 Workflow确保输入结构稳定可编排
- 其他 provider保持原 chat(system,user) 行为,兼容现有 OpenAI-compatible 配置。
"""
if not self._daily_report_llm_client:
return ""
meta = payload.get("report_meta", {}) or {}
room_id = str(meta.get("room_id") or "").strip()
user_id = f"douyu_daily_report_{room_id or 'unknown'}"
if self._daily_report_llm_client.provider == "dify":
inputs = self._build_dify_daily_report_inputs(
task_type=task_type,
system_prompt=system_prompt,
user_prompt=user_prompt,
payload=payload,
)
result = self._daily_report_llm_client.run(
prompt=user_prompt,
user=user_id,
inputs=inputs,
tag=tag,
)
return str((result or {}).get("text", "") or "").strip()
return self._daily_report_llm_client.chat(
system_prompt,
user_prompt,
user_id=user_id,
).strip()
async def _generate_danmu_summary_text(self, payload: Dict[str, Any]) -> str:
if self._daily_report_use_llm and self._daily_report_llm_client:
system_prompt, user_prompt = self._build_danmu_summary_prompt(payload)
result = await asyncio.to_thread(
self._call_daily_report_llm,
task_type="danmu_summary",
system_prompt=system_prompt,
user_prompt=user_prompt,
payload=payload,
tag=f"douyu_danmu_summary_{(payload.get('report_meta', {}) or {}).get('room_id', '')}",
)
if result:
cleaned = self._clean_daily_report_llm_text(result)
if cleaned:
return cleaned
logger.warning(
f"斗鱼弹幕总结 LLM 生成失败: model={self._daily_report_llm_client.model}, "
f"last_error={self._daily_report_llm_client.last_error}"
)
return self._build_fallback_danmu_summary(payload)
async def _generate_fans_daily_report_text(self, payload: Dict[str, Any]) -> str:
"""
生成独立的粉丝向恶搞日报正文。
这里继续复用统一的 LLM 客户端,但通过不同 task_type 和 prompt 把风格切开。
"""
if self._daily_report_use_llm and self._daily_report_llm_client:
system_prompt, user_prompt = self._build_fans_daily_report_prompt(payload)
result = await asyncio.to_thread(
self._call_daily_report_llm,
task_type="fans_daily_report",
system_prompt=system_prompt,
user_prompt=user_prompt,
payload=payload,
tag=f"douyu_fans_daily_report_{(payload.get('report_meta', {}) or {}).get('room_id', '')}",
)
if result:
text = self._clean_daily_report_llm_text(result)
if len(text) > self._daily_report_max_length:
return text[: self._daily_report_max_length - 20].rstrip() + "\n...(已截断)"
if text:
return text
logger.warning(
f"斗鱼粉丝日报 LLM 生成失败: model={self._daily_report_llm_client.model}, "
f"last_error={self._daily_report_llm_client.last_error}"
)
return self._build_fallback_fans_daily_report(payload)
async def _build_fans_daily_report_markdown(self, payload: Dict[str, Any]) -> str:
"""
Markdown 版本主要用于图片模板渲染失败时兜底。
即使最终还是走通用 markdown 截图,也要尽量保留粉丝版的结构感。
"""
meta = payload.get("report_meta", {}) or {}
title_name = str(meta.get("nickname") or meta.get("room_name") or meta.get("room_id") or "主播")
fans_report_text = await self._generate_fans_daily_report_text(payload)
lines = [
f"# {title_name} 的弹幕乐子日报",
f"{meta.get('anchor_day', '')}|弹幕 {meta.get('message_count', 0)}|围观群众 {meta.get('unique_user_count', 0)}",
"",
fans_report_text,
]
return "\n".join(lines).strip()
async def _render_fans_daily_report_image(self, payload: Dict[str, Any]) -> Optional[str]:
markdown = await self._build_fans_daily_report_markdown(payload)
room_id = str((payload.get("report_meta", {}) or {}).get("room_id", "") or "room")
anchor_day = str((payload.get("report_meta", {}) or {}).get("anchor_day", "") or "").replace("-", "")
filename = f"douyu_fans_daily_report_{room_id}_{anchor_day}.png"
try:
fans_report_text = await self._generate_fans_daily_report_text(payload)
html_content = render_fans_daily_report_html(
payload=payload,
fans_report_text=fans_report_text,
)
output_dir = os.path.join(os.getcwd(), "temp", "md2image")
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, filename)
await html_to_image(html_content, output_path)
return str(Path(output_path).resolve())
except Exception as e:
logger.error(f"斗鱼粉丝日报专用模板图片生成失败(room={room_id}, day={anchor_day}): {e}")
try:
return await convert_md_str_to_image(markdown, filename)
except Exception as e:
logger.error(f"斗鱼粉丝日报图片生成失败(room={room_id}, day={anchor_day}): {e}")
return None
async def _build_daily_report_markdown(self, payload: Dict[str, Any]) -> str:
meta = payload.get("report_meta", {}) or {}
title_name = str(meta.get("nickname") or meta.get("room_name") or meta.get("room_id") or "主播")
danmu_summary = await self._generate_danmu_summary_text(payload)
operator_summary = self._build_operator_summary_text(payload)
lines = [
f"# {title_name} 直播每日报告",
f"{meta.get('anchor_day', '')}|场次 {meta.get('session_count', 0)}|弹幕 {meta.get('message_count', 0)}|活跃用户 {meta.get('unique_user_count', 0)}",
"",
"## 弹幕总结",
danmu_summary,
"",
"## 运营数据总结",
operator_summary,
]
peak_buckets = payload.get("peak_buckets", []) or []
if peak_buckets:
lines.extend([
"",
"## 热点时段",
])
for item in peak_buckets[:3]:
terms = "".join(
[str(term.get("term") or "") for term in (item.get("top_terms", []) or [])[:4] if str(term.get("term") or "").strip()]
)
lines.append(
f"- `{str(item.get('start_time') or '')[-8:-3]}` 弹幕 {int(item.get('message_count', 0) or 0)} 条,关键词:{terms}"
)
merged_templates = payload.get("merged_templates", []) or []
if merged_templates:
lines.extend([
"",
"## 高频梗",
])
for item in merged_templates[:5]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
lines.append(f"- {text[:72]}{count}次)")
return "\n".join(lines).strip()
async def _render_daily_report_image(self, payload: Dict[str, Any]) -> Optional[str]:
markdown = await self._build_daily_report_markdown(payload)
room_id = str((payload.get("report_meta", {}) or {}).get("room_id", "") or "room")
anchor_day = str((payload.get("report_meta", {}) or {}).get("anchor_day", "") or "").replace("-", "")
filename = f"douyu_daily_report_{room_id}_{anchor_day}.png"
try:
danmu_summary = await self._generate_danmu_summary_text(payload)
html_content = render_daily_report_html(
payload=payload,
danmu_summary=danmu_summary,
operator_summary_lines=self._build_operator_summary_lines(payload),
)
output_dir = os.path.join(os.getcwd(), "temp", "md2image")
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, filename)
await html_to_image(html_content, output_path)
return str(Path(output_path).resolve())
except Exception as e:
logger.error(f"斗鱼专用模板图片生成失败(room={room_id}, day={anchor_day}): {e}")
try:
return await convert_md_str_to_image(markdown, filename)
except Exception as e:
logger.error(f"斗鱼每日报告图片生成失败(room={room_id}, day={anchor_day}): {e}")
return None
async def _generate_daily_report_text(self, payload: Dict[str, Any]) -> str:
if self._daily_report_use_llm and self._daily_report_llm_client:
system_prompt, user_prompt = self._build_daily_report_prompt(payload)
result = await asyncio.to_thread(
self._call_daily_report_llm,
task_type="daily_report",
system_prompt=system_prompt,
user_prompt=user_prompt,
payload=payload,
tag=f"douyu_daily_report_{(payload.get('report_meta', {}) or {}).get('room_id', '')}",
)
if result:
text = self._clean_daily_report_llm_text(result)
if len(text) > self._daily_report_max_length:
return text[: self._daily_report_max_length - 20].rstrip() + "\n...(已截断)"
if text:
return text
logger.warning(
f"斗鱼每日报告 LLM 生成失败: model={self._daily_report_llm_client.model}, "
f"last_error={self._daily_report_llm_client.last_error}"
)
return self._build_fallback_daily_report(payload)
async def _get_or_create_daily_report_result(
self,
room_id: str,
anchor_day: str,
payload: Dict[str, Any],
*,
force_regenerate: bool = False,
) -> Dict[str, Any]:
# force_regenerate=True 时,跳过本地缓存读取,直接重新生成文本/图片并覆盖缓存。
# 这样可以在模型提示词或模板变更后,通过命令立即验证最新效果。
cached = {} if force_regenerate else (
self._load_daily_report_cache(room_id, anchor_day, report_kind="operator") or {}
)
cached_image = self._resolve_existing_report_image(cached.get("report_image"))
cached_text = str(cached.get("report_text") or "").strip()
cached_version = int(cached.get("cache_version", 0) or 0)
if cached_version >= self._DAILY_REPORT_CACHE_VERSION and (cached_image or cached_text):
return {
"report_text": cached_text,
"report_image": cached_image,
"cached": True,
}
report_text = await self._generate_daily_report_text(payload)
report_image = None
if self._daily_report_send_image:
report_image = await self._render_daily_report_image(payload)
result = {
"room_id": room_id,
"anchor_day": anchor_day,
"cache_version": self._DAILY_REPORT_CACHE_VERSION,
"report_text": report_text,
"report_image": report_image,
"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
}
self._save_daily_report_cache(room_id, anchor_day, result, report_kind="operator")
result["cached"] = False
return result
async def _get_or_create_fans_daily_report_result(
self,
room_id: str,
anchor_day: str,
payload: Dict[str, Any],
*,
force_regenerate: bool = False,
) -> Dict[str, Any]:
"""
粉丝向日报使用独立缓存:
1. 避免和运营版相互覆盖;
2. 便于后续单独升级风格、模板、提示词;
3. 手动调试时也能明确区分当前命中的到底是哪一类结果。
"""
cached = {} if force_regenerate else (
self._load_daily_report_cache(room_id, anchor_day, report_kind="fans") or {}
)
cached_image = self._resolve_existing_report_image(cached.get("report_image"))
cached_text = str(cached.get("report_text") or "").strip()
cached_version = int(cached.get("cache_version", 0) or 0)
if cached_version >= self._DAILY_REPORT_CACHE_VERSION and (cached_image or cached_text):
return {
"report_text": cached_text,
"report_image": cached_image,
"cached": True,
}
report_text = await self._generate_fans_daily_report_text(payload)
report_image = None
if self._daily_report_send_image:
report_image = await self._render_fans_daily_report_image(payload)
result = {
"room_id": room_id,
"anchor_day": anchor_day,
"cache_version": self._DAILY_REPORT_CACHE_VERSION,
"report_text": report_text,
"report_image": report_image,
"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
}
self._save_daily_report_cache(room_id, anchor_day, result, report_kind="fans")
result["cached"] = False
return result
async def _send_daily_reports(
self,
anchor_day: str,
target_group_id: Optional[str] = None,
force: bool = False,
force_regenerate: bool = False,
) -> bool:
rooms = (
set(self.redis_manager.list_group_rooms(target_group_id))
if target_group_id
else self.redis_manager.all_subscribed_rooms()
)
if not rooms:
logger.info(
f"斗鱼每日报告无可处理房间: day={anchor_day}, target_group={target_group_id or 'ALL'}"
)
return False
delivered_any = False
for room_id in rooms:
sent_key = self._daily_report_room_key(room_id, anchor_day, group_id=target_group_id)
if not force and self.redis_manager.get_text_value(sent_key):
logger.info(f"斗鱼每日报告已发送过,跳过: room={room_id}, day={anchor_day}")
continue
sessions = self._load_sessions_for_anchor_day(room_id, anchor_day)
if not sessions:
logger.info(f"斗鱼每日报告无 session: room={room_id}, day={anchor_day}")
continue
if any(bool(session.get("is_live")) for session in sessions):
logger.info(f"斗鱼每日报告存在直播中场次,跳过: room={room_id}, day={anchor_day}")
continue
payload = self._build_daily_report_payload(room_id, anchor_day, sessions)
if not payload:
logger.info(
f"斗鱼每日报告 payload 为空: room={room_id}, day={anchor_day}, "
f"sessions={len(sessions)}, min_messages={self._daily_report_min_messages}"
)
continue
# 在真正生成日报前先预热一次背景画像:
# 1. 首次命中房间时尝试补全主播背景;
# 2. 结果进入 Redis后续同房间日报可直接复用
# 3. payload 会在这里被刷新成最新的 room_context。
payload = await self._ensure_room_background_profile(
room_id,
"",
"",
sessions,
payload,
force_refresh=force_regenerate,
)
report_result = await self._get_or_create_daily_report_result(
room_id,
anchor_day,
payload,
force_regenerate=force_regenerate,
)
report_text = str(report_result.get("report_text") or "").strip()
report_image = self._resolve_existing_report_image(report_result.get("report_image"))
groups = [target_group_id] if target_group_id else self.redis_manager.groups_for_room(room_id)
delivered = False
for gid in groups:
if not gid:
continue
if GroupBotManager.get_group_permission(gid, self.feature) != PermissionStatus.ENABLED:
continue
try:
if report_image:
await self.bot.send_image_message(gid, Path(report_image))
else:
await self.bot.send_text_message(gid, report_text)
delivered = True
delivered_any = True
except Exception as e:
logger.error(f"发送斗鱼每日报告失败(room={room_id}, group={gid}): {e}")
if delivered:
self.redis_manager.set_text_value(
sent_key,
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
)
return delivered_any
async def _send_fans_daily_reports(
self,
anchor_day: str,
target_group_id: Optional[str] = None,
*,
force_regenerate: bool = False,
force: bool = False,
respect_sent_flag: bool = False,
) -> bool:
"""
发送粉丝向恶搞日报。
发送策略说明:
1. 手工命令默认仍允许重复召回,方便群里反复看不同版本;
2. 接入定时任务后,可通过 respect_sent_flag 开启“同房间同日期”去重;
3. force 只表示忽略已发送标记,不改变 force_regenerate 的重生成语义。
"""
rooms = (
set(self.redis_manager.list_group_rooms(target_group_id))
if target_group_id
else self.redis_manager.all_subscribed_rooms()
)
if not rooms:
logger.info(
f"斗鱼粉丝日报无可处理房间: day={anchor_day}, target_group={target_group_id or 'ALL'}"
)
return False
delivered_any = False
for room_id in rooms:
sent_key = self._fans_daily_report_room_key(room_id, anchor_day, group_id=target_group_id)
if respect_sent_flag and not force and self.redis_manager.get_text_value(
sent_key
):
continue
sessions = self._load_sessions_for_anchor_day(room_id, anchor_day)
if not sessions:
logger.info(f"斗鱼粉丝日报无 session: room={room_id}, day={anchor_day}")
continue
if any(bool(session.get("is_live")) for session in sessions):
logger.info(f"斗鱼粉丝日报存在直播中场次,跳过: room={room_id}, day={anchor_day}")
continue
payload = self._build_daily_report_payload(room_id, anchor_day, sessions)
if not payload:
logger.info(
f"斗鱼粉丝日报 payload 为空: room={room_id}, day={anchor_day}, "
f"sessions={len(sessions)}, min_messages={self._daily_report_min_messages}"
)
continue
# 粉丝日报也需要同一份背景画像,以便更准确理解职业生涯梗、圈内人物和老名场面。
payload = await self._ensure_room_background_profile(
room_id,
"",
"",
sessions,
payload,
force_refresh=force_regenerate,
)
report_result = await self._get_or_create_fans_daily_report_result(
room_id,
anchor_day,
payload,
force_regenerate=force_regenerate,
)
report_text = str(report_result.get("report_text") or "").strip()
report_image = self._resolve_existing_report_image(report_result.get("report_image"))
groups = [target_group_id] if target_group_id else self.redis_manager.groups_for_room(room_id)
for gid in groups:
if not gid:
continue
if GroupBotManager.get_group_permission(gid, self.feature) != PermissionStatus.ENABLED:
continue
try:
if report_image:
await self.bot.send_image_message(gid, Path(report_image))
else:
await self.bot.send_text_message(gid, report_text)
delivered_any = True
if respect_sent_flag:
self.redis_manager.set_text_value(
sent_key,
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
)
except Exception as e:
logger.error(f"发送斗鱼粉丝日报失败(room={room_id}, group={gid}): {e}")
return delivered_any
def _start_danmu_record(self, room_id: str):
recorder = self._get_danmu_recorder(room_id)
recorder.start()
def _stop_danmu_record(self, room_id: str):
recorder = self._danmu_recorders.get(room_id)
if recorder:
recorder.stop()