Files
abot/plugins/douyu/main.py
liuwei 1eef1b5bf2 修复斗鱼日报Dify max_length字段类型错误
变更项:\n1. 将斗鱼日报传给 Dify 的 max_length 入参由 int 改为 string。\n2. 增加中文注释说明原因:对应 Workflow 输入表单字段为 paragraph 类型,必须传字符串,否则触发 invalid_param 400。\n3. 保持其他字段与逻辑不变,最小化改动范围,确保快速恢复日报链路。
2026-04-20 13:39:12 +08:00

2157 lines
101 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import asyncio
import json
from collections import Counter
from datetime import datetime, timedelta
import os
from pathlib import Path
import threading
import time
from typing import Dict, Any, List, Optional, Tuple, Set
import aiohttp
from loguru import logger
import ssl
import zlib
try:
import websocket
except ImportError:
websocket = None
from base.plugin_common.message_plugin_interface import MessagePluginInterface
from base.plugin_common.plugin_interface import PluginStatus
from db.connection import DBConnectionManager
from utils.ai.unified_llm import UnifiedLLMClient
from plugins.douyu.danmu_summary import DouyuDanmuSummaryHelper
from plugins.douyu.report_template import render_daily_report_html
from utils.decorator.async_job import async_job
from utils.decorator.plugin_decorators import plugin_stats_decorator
from utils.decorator.points_decorator import plugin_points_cost
from utils.markdown_to_image import convert_md_str_to_image, html_to_image
from utils.robot_cmd.robot_command import Feature, PermissionStatus, GroupBotManager
from wechat_ipad import WechatAPIClient
from wechat_ipad.models.appmsg_xml import DOUYU_MESSAGE_XML
class DouyuDanmuRecorder:
def __init__(self, room_id: str, user_agent: str, stats_callback=None, stats_sample_interval_seconds: int = 60):
self.room_id = room_id
self.user_agent = user_agent
self.stats_callback = stats_callback
self.stats_sample_interval_seconds = max(0, int(stats_sample_interval_seconds or 0))
self._thread: Optional[threading.Thread] = None
self._stop_event = threading.Event()
self._ws: Optional[websocket.WebSocketApp] = None
self._buffer: List[str] = []
self._buffer_limit = 10
self._buffer_date: Optional[str] = None
self._lock = threading.Lock()
self._websocket_available = websocket is not None
self._latest_vip_count: Optional[int] = None
self._latest_diamond_count: Optional[int] = None
self._last_stats_signature: Tuple[Optional[int], Optional[int]] = (None, None)
self._connect_retry_count = 3
self._connect_retry_delay_seconds = 1
def _encode(self, msg: str) -> bytes:
content = msg.encode("utf-8") + b"\x00"
length = len(content) + 8
head = length.to_bytes(4, "little") * 2
head += (689).to_bytes(2, "little")
head += b"\x00\x00"
return head + content
@staticmethod
def _parse_parts(line: str) -> Dict[str, Any]:
parts: Dict[str, Any] = {}
for pair in line.split("/"):
if "@=" in pair:
key, value = pair.split("@=", 1)
parts[key] = value
return parts
@staticmethod
def _safe_int(value: Any, default: Optional[int] = None) -> Optional[int]:
try:
return int(str(value))
except Exception:
return default
def _maybe_emit_stats(self, force: bool = False) -> None:
if not self.stats_callback:
return
if self._latest_vip_count is None and self._latest_diamond_count is None:
return
signature = (self._latest_vip_count, self._latest_diamond_count)
if not force:
if signature == self._last_stats_signature:
return
point = {
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"vip_count": self._latest_vip_count,
"diamond_count": self._latest_diamond_count,
}
try:
self.stats_callback(self.room_id, point)
self._last_stats_signature = signature
except Exception as e:
logger.warning(f"斗鱼人数采样回调失败({self.room_id}): {e}")
def _on_message(self, ws, message):
try:
decompressed = zlib.decompress(message, -zlib.MAX_WBITS)
data = decompressed.decode("utf-8", errors="ignore")
except Exception:
data = message.decode("utf-8", errors="ignore")
for line in data.split("\x00"):
line = line.strip()
if not line:
continue
parts = self._parse_parts(line)
msg_type = str(parts.get("type") or "").strip()
if msg_type == "oni":
vip_count = self._safe_int(parts.get("vn"))
if vip_count is not None:
self._latest_vip_count = vip_count
self._maybe_emit_stats()
continue
if msg_type == "dfnum":
diamond_count = self._safe_int(parts.get("dfc"))
if diamond_count is not None:
self._latest_diamond_count = diamond_count
self._maybe_emit_stats()
continue
if msg_type != "chatmsg":
continue
nick = parts.get("nn", "未知")
txt = parts.get("txt", "")
uid = parts.get("uid", "未知")
level = parts.get("level", "0")
fan_group = parts.get("bnn", "")
fan_level = parts.get("bl", "0")
time_stamp = parts.get("cst", "")
if time_stamp:
try:
if time_stamp.isdigit():
ts = int(time_stamp)
if ts > 10 ** 12:
ts = ts / 1000
dt = datetime.fromtimestamp(ts)
time_str = dt.strftime("%Y-%m-%d %H:%M:%S")
else:
dt = datetime.strptime(time_stamp, "%Y-%m-%d %H:%M:%S")
time_str = dt.strftime("%Y-%m-%d %H:%M:%S")
except Exception:
time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
else:
time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
output = f"[{time_str}] {nick} (UID: {uid}, Lv{level}"
if fan_group:
output += f" / {fan_group} Lv{fan_level}"
output += f"){txt}"
self._append_and_maybe_flush(output)
def _flush_locked(self):
if not self._buffer or self._buffer_date is None:
return
dir_path = os.path.join("temp", "douyu_danmu", self._buffer_date)
os.makedirs(dir_path, exist_ok=True)
file_name = os.path.join(dir_path, f"{self.room_id}_{self._buffer_date}.txt")
data = "\n".join(self._buffer) + "\n"
with open(file_name, "a", encoding="utf-8") as f:
f.write(data)
self._buffer.clear()
def _append_and_maybe_flush(self, line: str):
now = datetime.now()
date_str = now.strftime("%Y%m%d")
with self._lock:
if self._buffer_date is None:
self._buffer_date = date_str
elif date_str != self._buffer_date:
self._flush_locked()
self._buffer_date = date_str
self._buffer.append(line)
if len(self._buffer) >= self._buffer_limit:
self._flush_locked()
def _flush(self):
with self._lock:
self._flush_locked()
def _on_open(self, ws):
ws.send(self._encode(f"type@=loginreq/roomid@={self.room_id}/dmbt@=chrome/dmbv@=0/"))
ws.send(self._encode(f"type@=joingroup/rid@={self.room_id}/gid@=-9999/"))
def heartbeat():
while ws.sock and ws.sock.connected and not self._stop_event.is_set():
try:
ws.send(self._encode("type@=mrkl/"))
except Exception:
break
time.sleep(38)
threading.Thread(target=heartbeat, daemon=True).start()
def _on_error(self, ws, error):
logger.error(f"斗鱼弹幕错误({self.room_id}): {error}")
def _on_close(self, ws, code, msg):
logger.info(f"斗鱼弹幕连接关闭({self.room_id}): {code} {msg}")
def _run(self):
if not self._websocket_available:
logger.error(f"websocket-client 未安装,无法记录弹幕({self.room_id})")
return
try:
websocket.enableTrace(False)
ws_urls = [
"wss://danmuproxy.douyu.com:8501/",
"wss://danmuproxy.douyu.com:8502/",
"wss://danmuproxy.douyu.com:8503/",
"wss://danmuproxy.douyu.com:8504/",
"wss://danmuproxy.douyu.com:8505/",
"wss://danmuproxy.douyu.com:8506/",
]
sslopt = {
"cert_reqs": ssl.CERT_NONE,
"ssl_version": ssl.PROTOCOL_TLS_CLIENT,
"ciphers": "DEFAULT@SECLEVEL=1",
}
headers = {"User-Agent": self.user_agent}
for url in ws_urls:
if self._stop_event.is_set():
break
for attempt in range(1, self._connect_retry_count + 1):
if self._stop_event.is_set():
break
reconnect_needed = False
try:
self._ws = websocket.WebSocketApp(
url,
on_open=self._on_open,
on_message=self._on_message,
on_error=self._on_error,
on_close=self._on_close,
header=headers,
)
self._ws.run_forever(sslopt=sslopt, ping_interval=30, ping_timeout=10)
if self._stop_event.is_set():
break
reconnect_needed = True
except Exception as e:
if attempt < self._connect_retry_count:
logger.warning(
f"斗鱼弹幕连接失败({self.room_id}),第{attempt}/{self._connect_retry_count}次重试: "
f"url={url} err={e}"
)
time.sleep(self._connect_retry_delay_seconds)
continue
logger.error(
f"斗鱼弹幕连接失败({self.room_id}),已重试{self._connect_retry_count}次: "
f"url={url} err={e}"
)
finally:
self._ws = None
if reconnect_needed and attempt < self._connect_retry_count:
logger.warning(
f"斗鱼弹幕连接中断({self.room_id}),第{attempt}/{self._connect_retry_count}次重试: url={url}"
)
time.sleep(self._connect_retry_delay_seconds)
continue
if reconnect_needed and attempt >= self._connect_retry_count:
logger.error(
f"斗鱼弹幕连接中断({self.room_id}),已重试{self._connect_retry_count}次: url={url}"
)
break
if self._stop_event.is_set():
break
time.sleep(self._connect_retry_delay_seconds)
finally:
self._ws = None
def start(self):
if self._thread and self._thread.is_alive():
return
self._stop_event.clear()
self._thread = threading.Thread(target=self._run, daemon=True)
self._thread.start()
def stop(self):
self._maybe_emit_stats(force=True)
self._flush()
self._stop_event.set()
if self._ws:
try:
self._ws.close()
except Exception:
pass
class DouyuRedisManager:
def __init__(self, db_manager: DBConnectionManager):
self.redis = db_manager.get_redis_connection()
self.prefix = "bot:douyu:"
def add_group_room(self, group_id: str, room_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:rooms"
return self.redis.sadd(key, room_id) >= 0
def remove_group_room(self, group_id: str, room_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:rooms"
return self.redis.srem(key, room_id) >= 0
def list_group_rooms(self, group_id: str) -> List[str]:
key = f"{self.prefix}group:{group_id}:rooms"
rooms = self.redis.smembers(key) or set()
result = []
for r in rooms:
result.append(r.decode("utf-8") if isinstance(r, bytes) else r)
return sorted(result)
def all_subscribed_rooms(self) -> Set[str]:
groups = GroupBotManager.get_group_list()
rooms: Set[str] = set()
for gid in groups:
for r in self.list_group_rooms(gid):
rooms.add(r)
return rooms
def groups_for_room(self, room_id: str) -> List[str]:
groups = GroupBotManager.get_group_list()
res = []
for gid in groups:
if room_id in set(self.list_group_rooms(gid)):
res.append(gid)
return res
# --- 鱼吧相关方法 ---
def add_group_yuba(self, group_id: str, hash_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:yubas"
return self.redis.sadd(key, hash_id) >= 0
def remove_group_yuba(self, group_id: str, hash_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:yubas"
return self.redis.srem(key, hash_id) >= 0
def list_group_yubas(self, group_id: str) -> List[str]:
key = f"{self.prefix}group:{group_id}:yubas"
yubas = self.redis.smembers(key) or set()
result = []
for y in yubas:
result.append(y.decode("utf-8") if isinstance(y, bytes) else y)
return sorted(result)
def all_subscribed_yubas(self) -> Set[str]:
groups = GroupBotManager.get_group_list()
yubas: Set[str] = set()
for gid in groups:
for y in self.list_group_yubas(gid):
yubas.add(y)
return yubas
def groups_for_yuba(self, hash_id: str) -> List[str]:
groups = GroupBotManager.get_group_list()
res = []
for gid in groups:
if hash_id in set(self.list_group_yubas(gid)):
res.append(gid)
return res
def get_yuba_last_id(self, hash_id: str) -> Optional[str]:
key = f"{self.prefix}yuba_last_id:{hash_id}"
data = self.redis.get(key)
if not data:
return None
return data.decode("utf-8") if isinstance(data, bytes) else data
def set_yuba_last_id(self, hash_id: str, feed_id: str) -> bool:
key = f"{self.prefix}yuba_last_id:{hash_id}"
return self.redis.set(key, feed_id)
# --- 提醒名单方法 ---
def add_group_subscriber(self, group_id: str, user_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:subscribers"
return self.redis.sadd(key, user_id) >= 0
def remove_group_subscriber(self, group_id: str, user_id: str) -> bool:
key = f"{self.prefix}group:{group_id}:subscribers"
return self.redis.srem(key, user_id) >= 0
def list_group_subscribers(self, group_id: str) -> List[str]:
key = f"{self.prefix}group:{group_id}:subscribers"
subs = self.redis.smembers(key) or set()
result = []
for s in subs:
result.append(s.decode("utf-8") if isinstance(s, bytes) else s)
return sorted(result)
def get_room_status(self, room_id: str) -> Optional[Dict[str, Any]]:
key = f"{self.prefix}room_status:{room_id}"
data = self.redis.get(key)
if not data:
return None
if isinstance(data, bytes):
data = data.decode("utf-8")
try:
return json.loads(data)
except Exception:
return None
def set_room_status(self, room_id: str, status: Dict[str, Any]) -> bool:
key = f"{self.prefix}room_status:{room_id}"
return self.redis.set(key, json.dumps(status, ensure_ascii=False))
def get_room_session(self, room_id: str, session_id: str) -> Optional[Dict[str, Any]]:
key = f"{self.prefix}room:{room_id}:session:{session_id}"
data = self.redis.get(key)
if not data:
return None
if isinstance(data, bytes):
data = data.decode("utf-8")
try:
return json.loads(data)
except Exception:
return None
def save_room_session(self, room_id: str, session: Dict[str, Any]) -> bool:
session_id = str(session.get("session_id") or "").strip()
if not session_id:
return False
payload = json.dumps(session, ensure_ascii=False)
session_key = f"{self.prefix}room:{room_id}:session:{session_id}"
latest_key = f"{self.prefix}room:{room_id}:latest_session"
index_key = f"{self.prefix}room:{room_id}:session_ids"
pipe = self.redis.pipeline()
pipe.set(session_key, payload)
pipe.set(latest_key, session_id)
pipe.lrem(index_key, 0, session_id)
pipe.lpush(index_key, session_id)
pipe.ltrim(index_key, 0, 29)
result = pipe.execute()
return bool(result)
def get_latest_room_session(self, room_id: str) -> Optional[Dict[str, Any]]:
latest_key = f"{self.prefix}room:{room_id}:latest_session"
session_id = self.redis.get(latest_key)
if not session_id:
return None
if isinstance(session_id, bytes):
session_id = session_id.decode("utf-8")
return self.get_room_session(room_id, str(session_id))
def list_room_session_ids(self, room_id: str, limit: int = 10) -> List[str]:
key = f"{self.prefix}room:{room_id}:session_ids"
rows = self.redis.lrange(key, 0, max(limit - 1, 0)) or []
result = []
for row in rows:
result.append(row.decode("utf-8") if isinstance(row, bytes) else str(row))
return result
def get_text_value(self, key: str) -> Optional[str]:
data = self.redis.get(key)
if not data:
return None
return data.decode("utf-8") if isinstance(data, bytes) else str(data)
def set_text_value(self, key: str, value: str) -> bool:
return bool(self.redis.set(key, value))
class DouyuPlugin(MessagePluginInterface):
# 报告缓存版本号:
# 1. 版本升级后会自动让历史缓存失效,避免继续复用旧文本/旧图片;
# 2. 本次将版本提升到 5用于修复“手动触发日报未走 Dify命中缓存”的问题
# 确保升级后首次执行会重新走 LLM 生成链路。
_DAILY_REPORT_CACHE_VERSION = 5
FEATURE_KEY = "DOUYU_MONITOR"
FEATURE_DESCRIPTION = "🎮 斗鱼开播提醒 [订阅斗鱼 房间号, 取消订阅斗鱼 房间号]"
@property
def name(self) -> str:
return "斗鱼直播"
@property
def version(self) -> str:
return "1.0.0"
@property
def description(self) -> str:
return "斗鱼主播开播下播提醒与群订阅管理"
@property
def author(self) -> str:
return "ABOT Team"
@property
def command_prefix(self) -> Optional[str]:
return ""
@property
def commands(self) -> List[str]:
return self._commands
@property
def feature_key(self) -> Optional[str]:
return self.FEATURE_KEY
@property
def feature_description(self) -> Optional[str]:
return self.FEATURE_DESCRIPTION
def __init__(self):
super().__init__()
self.bot: WechatAPIClient = None
self.feature = self.register_feature()
self.redis_manager: Optional[DouyuRedisManager] = None
self._commands = ["斗鱼订阅", "取消斗鱼订阅", "斗鱼订阅列表", "斗鱼订阅提醒", "取消斗鱼订阅提醒",
"订阅鱼吧", "取消订阅鱼吧", "鱼吧订阅列表",
"#斗鱼弹幕日报", "斗鱼弹幕日报", "#强制斗鱼弹幕日报", "强制斗鱼弹幕日报"]
self._api_template = "https://www.douyu.com/betard/{room_id}"
self._yuba_api = "https://yuba.douyu.com/wgapi/yubanc/api/feed/getUserFeedList"
self._user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
self._check_interval = 5
self._session_cutoff_hour = 6
self._merge_gap_hours = 4
self._daily_report_enable = True
self._daily_report_time = "10:05"
self._daily_report_min_messages = 120
self._daily_report_use_llm = False
self._daily_report_max_sessions = 4
self._daily_report_max_length = 1800
self._daily_report_send_image = True
# Dify 入参策略:
# 默认发送精简字段,避免某些 Workflow 对复杂对象输入校验严格导致 400。
# 如需在工作流中使用完整结构化 payload可在 report_api 显式开启。
self._daily_report_include_structured_inputs = False
self._audience_stats_sample_interval_seconds = 60
self._status_check_retry_count = 3
self._status_check_retry_delay_seconds = 1
self._daily_report_llm_client: Optional[UnifiedLLMClient] = None
self._danmu_recorders: Dict[str, DouyuDanmuRecorder] = {}
async_job.every_minutes(self._check_interval)(self._scheduled_unified_check_job)
async_job.every_minutes(5)(self._scheduled_daily_report_tick)
@staticmethod
def _format_exception(exc: Exception) -> str:
message = str(exc).strip()
if message:
return f"{type(exc).__name__}: {message}"
return type(exc).__name__
async def _fetch_json_with_retries(self, session: aiohttp.ClientSession, url: str,
headers: Dict[str, str], context: str,
params: Optional[Dict[str, Any]] = None) -> Any:
last_error: Optional[Exception] = None
for attempt in range(1, self._status_check_retry_count + 1):
try:
async with session.get(
url,
headers=headers,
params=params,
timeout=aiohttp.ClientTimeout(total=10)
) as resp:
resp.raise_for_status()
return await resp.json(content_type=None)
except Exception as e:
last_error = e
if attempt < self._status_check_retry_count:
logger.warning(
f"{context}失败,第{attempt}/{self._status_check_retry_count}次重试: "
f"{self._format_exception(e)}"
)
await asyncio.sleep(self._status_check_retry_delay_seconds)
continue
raise
if last_error:
raise last_error
raise RuntimeError(f"{context}失败,未获取到有效响应")
async def _scheduled_unified_check_job(self):
"""统一检查直播和鱼吧动态"""
await self._scheduled_check_job()
await self._scheduled_yuba_check_job()
async def _scheduled_daily_report_tick(self):
"""每 5 分钟检查一次,命中配置时间后发送前一天日报。"""
if not self._daily_report_enable or not self.redis_manager or not self.bot:
return
now_dt = datetime.now()
if not self._should_run_daily_report(now_dt):
return
anchor_day = (now_dt - timedelta(days=1)).strftime("%Y-%m-%d")
try:
await self._send_daily_reports(anchor_day)
self.redis_manager.set_text_value(self._daily_report_job_key(now_dt.strftime("%Y-%m-%d")), now_dt.strftime("%Y-%m-%d %H:%M:%S"))
except Exception as e:
logger.error(f"斗鱼每日报告任务失败(anchor_day={anchor_day}): {e}")
def initialize(self, context: Dict[str, Any]) -> bool:
try:
dbm = DBConnectionManager.get_instance()
self.redis_manager = DouyuRedisManager(dbm)
self.bot = context.get("bot", self.bot)
cfg = self._config.get("Douyu", {})
cfg_cmds = cfg.get("command", [])
if isinstance(cfg_cmds, list) and cfg_cmds:
self._commands = list(dict.fromkeys(cfg_cmds + self._commands))
self._api_template = cfg.get("api_url_template", self._api_template)
self._user_agent = cfg.get("user_agent", self._user_agent)
self._check_interval = int(cfg.get("check_interval_minutes", self._check_interval))
self._session_cutoff_hour = int(cfg.get("session_cutoff_hour", self._session_cutoff_hour))
self._merge_gap_hours = int(cfg.get("merge_gap_hours", self._merge_gap_hours))
self._daily_report_enable = bool(cfg.get("daily_report_enable", self._daily_report_enable))
self._daily_report_time = str(cfg.get("daily_report_time", self._daily_report_time) or self._daily_report_time)
self._daily_report_min_messages = int(
cfg.get("daily_report_min_messages", self._daily_report_min_messages)
)
self._daily_report_use_llm = bool(cfg.get("daily_report_use_llm", self._daily_report_use_llm))
self._daily_report_max_sessions = int(cfg.get("daily_report_max_sessions", self._daily_report_max_sessions))
self._daily_report_max_length = int(cfg.get("daily_report_max_length", self._daily_report_max_length))
self._daily_report_send_image = bool(cfg.get("daily_report_send_image", self._daily_report_send_image))
self._audience_stats_sample_interval_seconds = int(
cfg.get("audience_stats_sample_interval_seconds", self._audience_stats_sample_interval_seconds)
)
report_api_cfg = cfg.get("report_api", {}) or {}
self._daily_report_include_structured_inputs = bool(
report_api_cfg.get(
"include_structured_inputs",
self._daily_report_include_structured_inputs,
)
)
if report_api_cfg:
self._daily_report_llm_client = UnifiedLLMClient(report_api_cfg)
return True
except Exception as e:
logger.error(f"{self.name} 初始化失败: {e}")
return False
def start(self) -> bool:
self.status = PluginStatus.RUNNING
return True
def stop(self) -> bool:
for recorder in getattr(self, "_danmu_recorders", {}).values():
recorder.stop()
self.status = PluginStatus.STOPPED
return True
def can_process(self, message: Dict[str, Any]) -> bool:
content = str(message.get("content", "")).strip()
if not content:
return False
first_token = content.split()[0]
return first_token in self._commands
@plugin_stats_decorator(plugin_name="斗鱼直播")
async def process_message(self, message: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
content = str(message.get("content", "")).strip()
sender = message.get("sender")
roomid = message.get("roomid", "")
gbm: GroupBotManager = message.get("gbm")
self.bot: WechatAPIClient = message.get("bot")
if roomid and gbm.get_group_permission(roomid, self.feature) == PermissionStatus.DISABLED:
return False, "没有权限"
first_token = content.split()[0]
if content == "斗鱼订阅列表":
rooms = self.redis_manager.list_group_rooms(roomid or sender)
if not rooms:
await self.bot.send_text_message(roomid or sender, "暂无订阅", sender)
return True, "暂无订阅"
text = "当前订阅的斗鱼房间:\n" + "\n".join(rooms)
await self.bot.send_text_message(roomid or sender, text, sender)
return True, "列表已发送"
if first_token == "斗鱼订阅提醒":
if not roomid:
await self.bot.send_text_message(sender, "请在群聊中使用该命令", sender)
return True, "仅支持群聊"
ok = self.redis_manager.add_group_subscriber(roomid, sender)
await self.bot.send_at_message(roomid, "已加入斗鱼订阅提醒名单", [sender])
return True, "加入提醒名单成功" if ok else "加入提醒名单失败"
if first_token == "取消斗鱼订阅提醒":
if not roomid:
await self.bot.send_text_message(sender, "请在群聊中使用该命令", sender)
return True, "仅支持群聊"
ok = self.redis_manager.remove_group_subscriber(roomid, sender)
await self.bot.send_at_message(roomid, "已取消斗鱼订阅提醒", [sender])
return True, "取消提醒成功" if ok else "取消提醒失败"
if first_token == "斗鱼订阅":
parts = content.split()
if len(parts) < 2:
await self.bot.send_text_message(roomid or sender, "请提供房间号,例如:订阅斗鱼 7718843", sender)
return True, "命令格式错误"
room_id = parts[1].strip()
if not room_id.isdigit():
await self.bot.send_text_message(roomid or sender, "房间号必须是数字,例如:斗鱼订阅 52876", sender)
return True, "命令格式错误"
ok = self.redis_manager.add_group_room(roomid or sender, room_id)
await self.bot.send_text_message(roomid or sender, f"✅ 已订阅斗鱼房间 {room_id}", sender)
return True, "订阅成功" if ok else "订阅失败"
if first_token == "取消斗鱼订阅":
parts = content.split()
if len(parts) < 2:
await self.bot.send_text_message(roomid or sender, "请提供房间号,例如:取消订阅斗鱼 7718843", sender)
return True, "命令格式错误"
room_id = parts[1].strip()
if not room_id.isdigit():
await self.bot.send_text_message(roomid or sender, "房间号必须是数字,例如:取消斗鱼订阅 52876", sender)
return True, "命令格式错误"
ok = self.redis_manager.remove_group_room(roomid or sender, room_id)
await self.bot.send_text_message(roomid or sender, f"✅ 已取消订阅斗鱼房间 {room_id}", sender)
return True, "取消成功" if ok else "取消失败"
if content == "鱼吧订阅列表":
yubas = self.redis_manager.list_group_yubas(roomid or sender)
if not yubas:
await self.bot.send_text_message(roomid or sender, "暂无鱼吧订阅", sender)
return True, "暂无鱼吧订阅"
text = "当前订阅的斗鱼鱼吧:\n" + "\n".join(yubas)
await self.bot.send_text_message(roomid or sender, text, sender)
return True, "列表已发送"
if first_token == "订阅鱼吧":
parts = content.split()
if len(parts) < 2:
await self.bot.send_text_message(roomid or sender, "请提供鱼吧 hash_id例如订阅鱼吧 PDAP2zEk3nwx",
sender)
return True, "命令格式错误"
hash_id = parts[1].strip()
ok = self.redis_manager.add_group_yuba(roomid or sender, hash_id)
await self.bot.send_text_message(roomid or sender, f"✅ 已订阅斗鱼鱼吧 {hash_id}", sender)
return True, "订阅成功" if ok else "订阅失败"
if first_token == "取消订阅鱼吧":
parts = content.split()
if len(parts) < 2:
await self.bot.send_text_message(roomid or sender, "请提供鱼吧 hash_id例如取消订阅鱼吧 PDAP2zEk3nwx",
sender)
return True, "命令格式错误"
hash_id = parts[1].strip()
ok = self.redis_manager.remove_group_yuba(roomid or sender, hash_id)
await self.bot.send_text_message(roomid or sender, f"✅ 已取消订阅斗鱼鱼吧 {hash_id}", sender)
return True, "取消成功" if ok else "取消失败"
if first_token in {"#斗鱼弹幕日报", "斗鱼弹幕日报"}:
if not roomid:
await self.bot.send_text_message(sender, "请在群聊中使用该命令", sender)
return True, "仅支持群聊"
parts = content.split()
anchor_day = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
if len(parts) >= 2:
day_text = parts[1].strip()
try:
anchor_day = datetime.strptime(day_text, "%Y-%m-%d").strftime("%Y-%m-%d")
except Exception:
await self.bot.send_text_message(roomid, "日期格式错误,请使用:#斗鱼弹幕日报 2026-04-07", sender)
return True, "日期格式错误"
await self.bot.send_text_message(roomid, f"⏳ 正在生成斗鱼弹幕日报:{anchor_day}", sender)
# 普通手动命令也默认重生成,避免命中缓存后看起来“没有走 Dify”。
# 定时任务仍保留缓存策略,这里只影响人工触发路径。
delivered = await self._send_daily_reports(
anchor_day,
target_group_id=roomid,
force=True,
force_regenerate=True,
)
if delivered:
return True, f"斗鱼弹幕日报已发送:{anchor_day}"
await self.bot.send_text_message(roomid, f"暂无可发送的斗鱼弹幕日报:{anchor_day}", sender)
return True, "暂无日报"
if first_token in {"#强制斗鱼弹幕日报", "强制斗鱼弹幕日报"}:
if not roomid:
await self.bot.send_text_message(sender, "请在群聊中使用该命令", sender)
return True, "仅支持群聊"
parts = content.split()
anchor_day = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
if len(parts) >= 2:
day_text = parts[1].strip()
try:
anchor_day = datetime.strptime(day_text, "%Y-%m-%d").strftime("%Y-%m-%d")
except Exception:
await self.bot.send_text_message(roomid, "日期格式错误,请使用:#强制斗鱼弹幕日报 2026-04-07", sender)
return True, "日期格式错误"
# 这里明确提示“强制重生成”,便于群内区分普通日报和回归测试操作。
await self.bot.send_text_message(roomid, f"⏳ 正在强制重生成斗鱼弹幕日报:{anchor_day}", sender)
delivered = await self._send_daily_reports(
anchor_day,
target_group_id=roomid,
force=True,
force_regenerate=True,
)
if delivered:
return True, f"斗鱼弹幕日报已强制重生成并发送:{anchor_day}"
await self.bot.send_text_message(roomid, f"暂无可发送的斗鱼弹幕日报:{anchor_day}", sender)
return True, "暂无日报"
return False, None
async def _scheduled_check_job(self):
try:
rooms = self.redis_manager.all_subscribed_rooms()
if not rooms:
return
async with aiohttp.ClientSession() as session:
for room_id in rooms:
try:
url = self._api_template.format(room_id=room_id)
headers = {
"User-Agent": self._user_agent,
"Referer": f"https://www.douyu.com/{room_id}"
}
data = await self._fetch_json_with_retries(
session,
url,
headers,
context=f"斗鱼在线检查(room_id={room_id})"
)
room_info = data.get("room", {}) if isinstance(data, dict) else {}
show_status = room_info.get("show_status")
nickname = room_info.get("nickname", "")
room_name = room_info.get("room_name", "")
avatar = room_info.get("avatar", {}) or {}
thumb_url = str(avatar.get("small", "") or "").strip().strip("`").strip()
video_loop_raw = room_info.get("videoLoop", 0)
try:
video_loop = int(str(video_loop_raw))
except Exception:
video_loop = 0
prev = self.redis_manager.get_room_status(room_id) or {}
prev_live = prev.get("is_live")
curr_live = True if show_status == 1 and video_loop == 0 else False
status_obj = {
"is_live": curr_live,
"nickname": nickname,
"room_name": room_name,
"is_loop": True if video_loop == 1 else False
}
self.redis_manager.set_room_status(room_id, status_obj)
if prev_live is None and curr_live is False:
continue
if prev_live is None and curr_live is True:
await self._notify_groups_live(room_id, nickname, room_name, thumb_url)
continue
if prev_live is False and curr_live is True:
await self._notify_groups_live(room_id, nickname, room_name, thumb_url)
continue
if prev_live is True and curr_live is False:
await self._notify_groups_offline(room_id, nickname, room_name, video_loop == 1)
continue
if prev_live is True and curr_live is True and room_id not in self._danmu_recorders:
try:
room_session = self._open_or_resume_session(room_id, nickname, room_name)
if room_session:
logger.info(
f"检测到持续直播状态,续接斗鱼直播会话({room_id}): "
f"session={room_session.get('session_id')}"
)
logger.info(f"检测到持续直播状态,补偿启动斗鱼弹幕记录({room_id})")
self._start_danmu_record(room_id)
except Exception as e:
logger.exception(
f"补偿启动斗鱼弹幕记录失败({room_id}): {self._format_exception(e)}"
)
continue
await asyncio.sleep(0.1)
except Exception as e:
logger.exception(
f"斗鱼检查失败(room_id={room_id}): {self._format_exception(e)}"
)
continue
except Exception as e:
logger.exception(f"斗鱼定时任务异常: {self._format_exception(e)}")
async def _notify_groups_live(self, room_id: str, nickname: str, room_name: str, thumb_url: str):
groups = self.redis_manager.groups_for_room(room_id)
text = f"🚀 斗鱼开播通知 \n🎤 {nickname} 正在直播中!\n 📌 房间标题:{room_name} \n 👉 点击观看https://www.douyu.com/{room_id}"
xml_content = DOUYU_MESSAGE_XML.format(title=room_name, liver=nickname, roomid=room_id, thumburl=thumb_url)
for gid in groups:
if GroupBotManager.get_group_permission(gid, self.feature) == PermissionStatus.ENABLED:
try:
subs = self.redis_manager.list_group_subscribers(gid)
if subs:
await self.bot.send_at_message(gid, text, subs)
else:
await self.bot.send_text_message(gid, text)
await self.bot.send_link_xml_message(xml_content, gid)
except Exception as e:
logger.error(f"发送斗鱼开播提醒失败: {e}")
continue
try:
session = self._open_or_resume_session(room_id, nickname, room_name)
if session:
logger.info(
f"斗鱼直播会话开启/续接: room={room_id}, session={session.get('session_id')}, "
f"segments={len(session.get('segments', []))}, anchor_day={session.get('anchor_day')}"
)
logger.info(f"启动斗鱼弹幕记录({room_id})")
self._start_danmu_record(room_id)
except Exception as e:
logger.error(f"启动斗鱼弹幕记录失败({room_id}): {e}")
async def _notify_groups_offline(self, room_id: str, nickname: str, room_name: str, is_loop: bool = False):
groups = self.redis_manager.groups_for_room(room_id)
text = f"🔔 斗鱼提醒:{nickname} 下播啦~\n 🏷️ {room_name}"
if is_loop:
text += "(当前为轮播)"
for gid in groups:
if GroupBotManager.get_group_permission(gid, self.feature) == PermissionStatus.ENABLED:
try:
await self.bot.send_text_message(gid, text)
except Exception as e:
logger.error(f"发送斗鱼下播提醒失败: {e}")
continue
try:
session = self._close_active_session(room_id, nickname, room_name)
if session:
logger.info(
f"斗鱼直播会话关闭片段: room={room_id}, session={session.get('session_id')}, "
f"segments={len(session.get('segments', []))}, is_live={session.get('is_live')}"
)
logger.info(f"停止斗鱼弹幕记录({room_id})")
self._stop_danmu_record(room_id)
except Exception as e:
logger.error(f"停止斗鱼弹幕记录失败({room_id}): {e}")
async def _scheduled_yuba_check_job(self):
try:
yubas = self.redis_manager.all_subscribed_yubas()
if not yubas:
return
async with aiohttp.ClientSession() as session:
for hash_id in yubas:
try:
params = {
"filter_type": 1,
"hash_id": hash_id,
"limit": 10,
"offset": 0
}
headers = {
"User-Agent": self._user_agent,
"Referer": f"https://yuba.douyu.com/member/{hash_id}/main/news",
}
data = await self._fetch_json_with_retries(
session,
self._yuba_api,
headers,
context=f"斗鱼鱼吧检查(hash_id={hash_id})",
params=params
)
if data.get("error") != 0:
logger.error(f"斗鱼鱼吧 API 错误 ({hash_id}): {data.get('msg')}")
continue
feed_list = data.get("data", {}).get("feed_list", [])
# 查找第一条【非置顶】动态
target_feed = None
for feed in feed_list:
if feed.get("home_feed_top") == 1:
continue
target_feed = feed
break
if not target_feed:
continue
feed_id = str(target_feed.get("feed_id"))
last_id = self.redis_manager.get_yuba_last_id(hash_id)
if last_id and feed_id == last_id:
continue
# 发现新动态
nickname = target_feed.get("publisher", {}).get("nickname", "未知主播")
content = target_feed.get("text", "")
ctime = target_feed.get("ctime")
from datetime import datetime
publish_time = datetime.fromtimestamp(int(ctime)).strftime(
'%Y-%m-%d %H:%M:%S') if ctime else "未知时间"
# 限制内容长度
if len(content) > 200:
content = content[:200] + "..."
full_url = f"https://yuba.douyu.com/feed/{feed_id}"
await self._notify_groups_yuba(hash_id, nickname, content, full_url, publish_time)
# 保存标记
self.redis_manager.set_yuba_last_id(hash_id, feed_id)
await asyncio.sleep(0.5)
except Exception as e:
logger.exception(
f"检查斗鱼鱼吧({hash_id})失败: {self._format_exception(e)}"
)
continue
except Exception as e:
logger.exception(f"斗鱼鱼吧定时任务异常: {self._format_exception(e)}")
async def _notify_groups_yuba(self, hash_id: str, nickname: str, content: str, url: str,
publish_time: str = "未知时间"):
groups = self.redis_manager.groups_for_yuba(hash_id)
text = f"🌟 斗鱼鱼吧动态提醒 \n👤 主播:{nickname}\n⏰ 时间:{publish_time}\n📝 内容:{content}\n🔗 链接:{url}"
for gid in groups:
if GroupBotManager.get_group_permission(gid, self.feature) == PermissionStatus.ENABLED:
try:
await self.bot.send_text_message(gid, text)
except Exception as e:
logger.error(f"发送斗鱼鱼吧动态提醒失败: {e}")
continue
def _get_danmu_recorder(self, room_id: str) -> DouyuDanmuRecorder:
recorder = self._danmu_recorders.get(room_id)
if not recorder:
recorder = DouyuDanmuRecorder(
room_id,
self._user_agent,
stats_callback=self._record_room_audience_point,
stats_sample_interval_seconds=self._audience_stats_sample_interval_seconds,
)
self._danmu_recorders[room_id] = recorder
return recorder
@classmethod
def _normalize_audience_points(cls, points: List[Dict[str, Any]], limit: int = 720) -> List[Dict[str, Any]]:
minute_map: Dict[str, Dict[str, Any]] = {}
for item in points or []:
timestamp = str(item.get("timestamp") or "").strip()
point_dt = cls._parse_session_time(timestamp)
if not point_dt:
continue
minute_key = point_dt.strftime("%Y-%m-%d %H:%M")
minute_map[minute_key] = {
"timestamp": timestamp,
"vip_count": int(item.get("vip_count", 0) or 0),
"diamond_count": int(item.get("diamond_count", 0) or 0),
}
normalized = list(minute_map.values())
normalized.sort(key=lambda row: row.get("timestamp", ""))
if len(normalized) <= limit:
return normalized
bucket_size_minutes = max((len(normalized) + limit - 1) // limit, 1)
bucket_map: Dict[str, Dict[str, Any]] = {}
for item in normalized:
point_dt = cls._parse_session_time(str(item.get("timestamp") or ""))
if not point_dt:
continue
total_minutes = int(point_dt.timestamp() // 60)
bucket_start_minutes = total_minutes - (total_minutes % bucket_size_minutes)
bucket_key = str(bucket_start_minutes)
bucket_map[bucket_key] = item
compressed = list(bucket_map.values())
compressed.sort(key=lambda row: row.get("timestamp", ""))
if len(compressed) > limit:
compressed = compressed[-limit:]
return compressed
def _record_room_audience_point(self, room_id: str, point: Dict[str, Any]) -> None:
if not self.redis_manager or not room_id:
return
session = self.redis_manager.get_latest_room_session(room_id)
if not session or not bool(session.get("is_live")):
return
current_points = self._normalize_audience_points(list(session.get("audience_points", []) or []))
merged_points = self._normalize_audience_points(current_points + [point])
if merged_points == current_points:
return
session["audience_points"] = merged_points
session["updated_at"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
self.redis_manager.save_room_session(room_id, session)
def _resolve_anchor_day(self, target_dt: datetime) -> str:
if target_dt.hour < self._session_cutoff_hour:
target_dt = target_dt - timedelta(days=1)
return target_dt.strftime("%Y-%m-%d")
@staticmethod
def _parse_session_time(value: str) -> Optional[datetime]:
if not value:
return None
try:
return datetime.strptime(str(value), "%Y-%m-%d %H:%M:%S")
except Exception:
return None
@staticmethod
def _find_open_segment(session: Dict[str, Any]) -> Optional[Dict[str, Any]]:
for segment in reversed(session.get("segments", []) or []):
if not str(segment.get("end_time") or "").strip():
return segment
return None
def _should_merge_with_latest_session(self, latest_session: Optional[Dict[str, Any]], now_dt: datetime) -> bool:
if not latest_session:
return False
if latest_session.get("is_live"):
return True
segments = latest_session.get("segments", []) or []
if not segments:
return False
last_segment = segments[-1]
end_dt = self._parse_session_time(last_segment.get("end_time", ""))
if not end_dt:
return False
gap_seconds = (now_dt - end_dt).total_seconds()
return 0 <= gap_seconds <= self._merge_gap_hours * 3600
def _open_or_resume_session(self, room_id: str, nickname: str, room_name: str) -> Optional[Dict[str, Any]]:
if not self.redis_manager:
return None
now_dt = datetime.now()
now_str = now_dt.strftime("%Y-%m-%d %H:%M:%S")
latest_session = self.redis_manager.get_latest_room_session(room_id) or {}
if self._should_merge_with_latest_session(latest_session, now_dt):
session = dict(latest_session)
open_segment = self._find_open_segment(session)
if not open_segment:
segments = list(session.get("segments", []) or [])
segments.append({"start_time": now_str, "end_time": ""})
session["segments"] = segments
else:
anchor_day = self._resolve_anchor_day(now_dt)
session = {
"session_id": f"{room_id}_{anchor_day.replace('-', '')}_{now_dt.strftime('%H%M%S')}",
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": nickname,
"room_name": room_name,
"segments": [{"start_time": now_str, "end_time": ""}],
"audience_points": [],
"is_live": True,
"summary_status": "pending",
"summary_generated_at": "",
"created_at": now_str,
}
session["nickname"] = nickname or session.get("nickname", "")
session["room_name"] = room_name or session.get("room_name", "")
session["audience_points"] = self._normalize_audience_points(list(session.get("audience_points", []) or []))
session["is_live"] = True
session["updated_at"] = now_str
session["last_live_at"] = now_str
self.redis_manager.save_room_session(room_id, session)
return session
def _close_active_session(self, room_id: str, nickname: str, room_name: str) -> Optional[Dict[str, Any]]:
if not self.redis_manager:
return None
session = self.redis_manager.get_latest_room_session(room_id)
if not session:
return None
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
open_segment = self._find_open_segment(session)
if open_segment:
open_segment["end_time"] = now_str
session["nickname"] = nickname or session.get("nickname", "")
session["room_name"] = room_name or session.get("room_name", "")
session["is_live"] = False
session["updated_at"] = now_str
session["last_offline_at"] = now_str
self.redis_manager.save_room_session(room_id, session)
return session
def get_room_session(self, room_id: str, session_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
if not self.redis_manager or not room_id:
return None
if session_id:
return self.redis_manager.get_room_session(room_id, session_id)
return self.redis_manager.get_latest_room_session(room_id)
def build_session_danmu_material(self, room_id: str, session_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
"""
旁路能力:从已有日文件中按直播 session 抽取有效弹幕,并压缩成可供后续总结使用的材料。
当前不影响提醒、采集、群消息发送主流程。
"""
session = self.get_room_session(room_id, session_id=session_id)
if not session:
return None
messages = DouyuDanmuSummaryHelper.load_session_messages(room_id, session)
material = DouyuDanmuSummaryHelper.build_summary_material(room_id, session, messages)
material["session"] = {
"session_id": session.get("session_id", ""),
"anchor_day": session.get("anchor_day", ""),
"nickname": session.get("nickname", ""),
"room_name": session.get("room_name", ""),
"is_live": bool(session.get("is_live")),
}
return material
def build_session_llm_payload(self, room_id: str, session_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
"""
旁路能力:构造可直接发送给 LLM 的弹幕总结载荷。
不改变现有弹幕采集和通知主流程。
"""
session = self.get_room_session(room_id, session_id=session_id)
if not session:
return None
messages = DouyuDanmuSummaryHelper.load_session_messages(room_id, session)
return DouyuDanmuSummaryHelper.build_llm_payload(room_id, session, messages)
def _daily_report_job_key(self, day_key: str) -> str:
return f"{self.redis_manager.prefix}daily_report_job:{day_key}"
def _daily_report_room_key(self, room_id: str, anchor_day: str) -> str:
return f"{self.redis_manager.prefix}daily_report:{room_id}:{anchor_day}"
@staticmethod
def _daily_report_cache_dir() -> str:
path = os.path.join("temp", "douyu_materials")
os.makedirs(path, exist_ok=True)
return path
def _daily_report_cache_path(self, room_id: str, anchor_day: str) -> str:
return os.path.join(
self._daily_report_cache_dir(),
f"{room_id}_{anchor_day.replace('-', '')}_daily_report_result.json",
)
def _load_daily_report_cache(self, room_id: str, anchor_day: str) -> Optional[Dict[str, Any]]:
cache_path = self._daily_report_cache_path(room_id, anchor_day)
if not os.path.exists(cache_path):
return None
try:
with open(cache_path, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, dict):
return data
except Exception as e:
logger.warning(f"读取斗鱼每日报告缓存失败(room={room_id}, day={anchor_day}): {e}")
return None
def _save_daily_report_cache(self, room_id: str, anchor_day: str, data: Dict[str, Any]) -> None:
cache_path = self._daily_report_cache_path(room_id, anchor_day)
try:
with open(cache_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
except Exception as e:
logger.warning(f"保存斗鱼每日报告缓存失败(room={room_id}, day={anchor_day}): {e}")
@staticmethod
def _resolve_existing_report_image(image_path: Optional[str]) -> Optional[str]:
path = str(image_path or "").strip()
if not path:
return None
return path if os.path.exists(path) else None
def _should_run_daily_report(self, now_dt: datetime) -> bool:
time_text = str(self._daily_report_time or "").strip()
try:
target_hour, target_minute = [int(part) for part in time_text.split(":", 1)]
except Exception:
return False
target_dt = now_dt.replace(hour=target_hour, minute=target_minute, second=0, microsecond=0)
if now_dt < target_dt or now_dt > target_dt + timedelta(minutes=4, seconds=59):
return False
last_run = self.redis_manager.get_text_value(self._daily_report_job_key(now_dt.strftime("%Y-%m-%d")))
return not last_run
def _load_sessions_for_anchor_day(self, room_id: str, anchor_day: str) -> List[Dict[str, Any]]:
if not self.redis_manager:
return []
sessions = []
for session_id in self.redis_manager.list_room_session_ids(room_id, limit=30):
session = self.redis_manager.get_room_session(room_id, session_id)
if not session:
continue
if str(session.get("anchor_day") or "") != anchor_day:
continue
sessions.append(session)
sessions.sort(
key=lambda item: str(((item.get("segments") or [{}])[0]).get("start_time", "")),
)
if sessions:
return sessions[:self._daily_report_max_sessions]
inferred_sessions = self._infer_sessions_for_anchor_day(room_id, anchor_day)
if inferred_sessions:
logger.info(
f"斗鱼每日报告使用弹幕文件回推 session: room={room_id}, day={anchor_day}, "
f"count={len(inferred_sessions)}"
)
return inferred_sessions[:self._daily_report_max_sessions]
return []
def _infer_sessions_for_anchor_day(self, room_id: str, anchor_day: str) -> List[Dict[str, Any]]:
date_key = anchor_day.replace("-", "")
day_messages = DouyuDanmuSummaryHelper.load_day_messages(room_id, date_key)
if not day_messages:
return []
inferred_sessions = DouyuDanmuSummaryHelper.infer_sessions_from_messages(
room_id,
day_messages,
session_cutoff_hour=self._session_cutoff_hour,
merge_gap_hours=self._merge_gap_hours,
min_session_messages=min(50, self._daily_report_min_messages),
)
inferred_sessions = [
item for item in inferred_sessions
if str(item.get("anchor_day") or "") == anchor_day
]
if inferred_sessions:
return inferred_sessions
if len(day_messages) < self._daily_report_min_messages:
return []
ordered = sorted(day_messages, key=lambda item: item.get("timestamp") or datetime.min)
start_dt = ordered[0].get("timestamp")
end_dt = ordered[-1].get("timestamp")
if not isinstance(start_dt, datetime) or not isinstance(end_dt, datetime):
return []
return [{
"session_id": f"{room_id}_{date_key}_fallback",
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": "",
"room_name": "",
"segments": [{
"start_time": start_dt.strftime("%Y-%m-%d %H:%M:%S"),
"end_time": end_dt.strftime("%Y-%m-%d %H:%M:%S"),
}],
"is_live": False,
"source": "fallback_full_day",
}]
def _build_audience_trend(self, sessions: List[Dict[str, Any]]) -> Dict[str, Any]:
points: List[Dict[str, Any]] = []
segment_start_times: List[str] = []
segment_end_times: List[str] = []
for session in sessions:
for segment in session.get("segments", []) or []:
start_time = str(segment.get("start_time") or "").strip()
end_time = str(segment.get("end_time") or "").strip()
if start_time:
segment_start_times.append(start_time)
if end_time:
segment_end_times.append(end_time)
for item in session.get("audience_points", []) or []:
point = {
"timestamp": str(item.get("timestamp") or "").strip(),
"vip_count": int(item.get("vip_count", 0) or 0),
"diamond_count": int(item.get("diamond_count", 0) or 0),
}
if point["timestamp"]:
points.append(point)
points = self._normalize_audience_points(points, limit=1440)
if not points:
return {"points": [], "summary": {}}
vip_values = [int(item.get("vip_count", 0) or 0) for item in points]
diamond_values = [int(item.get("diamond_count", 0) or 0) for item in points]
labels = [str(item.get("timestamp") or "")[-8:-3] for item in points]
session_start = min(segment_start_times) if segment_start_times else ""
session_end = max(segment_end_times) if segment_end_times else str(points[-1].get("timestamp") or "")
first_point_time = str(points[0].get("timestamp") or "")
last_point_time = str(points[-1].get("timestamp") or "")
leading_gap_minutes = 0
if session_start and first_point_time:
start_dt = self._parse_session_time(session_start)
point_dt = self._parse_session_time(first_point_time)
if start_dt and point_dt:
leading_gap_minutes = max(int((point_dt - start_dt).total_seconds() // 60), 0)
return {
"points": points,
"summary": {
"point_count": len(points),
"vip_min": min(vip_values),
"vip_max": max(vip_values),
"vip_latest": vip_values[-1],
"diamond_min": min(diamond_values),
"diamond_max": max(diamond_values),
"diamond_latest": diamond_values[-1],
"labels": labels,
"session_start": session_start,
"session_end": session_end,
"first_point_time": first_point_time,
"last_point_time": last_point_time,
"leading_gap_minutes": leading_gap_minutes,
},
}
def _build_daily_report_payload(self, room_id: str, anchor_day: str, sessions: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
if not sessions:
return None
session_payloads: List[Dict[str, Any]] = []
total_message_count = 0
total_noise_filtered_count = 0
total_organized_message_count = 0
total_unique_users: Set[str] = set()
merged_templates: List[Dict[str, Any]] = []
repeated_messages: List[Dict[str, Any]] = []
peak_buckets: List[Dict[str, Any]] = []
representative_messages: List[Dict[str, Any]] = []
raw_window_samples: List[Dict[str, Any]] = []
top_terms_counter = Counter()
burst_terms_counter = Counter()
operator_totals = {
"fans_badge_user_count": 0,
"fans_badge_message_count": 0,
"high_room_level_user_count": 0,
"high_fans_level_user_count": 0,
"noble_user_count": 0,
"noble_message_count": 0,
"active_users_5plus": 0,
"active_users_10plus": 0,
}
top_badge_counter = Counter()
top_badge_message_counter = Counter()
top_active_user_map: Dict[str, Dict[str, Any]] = {}
nickname = ""
room_name = ""
for session in sessions:
messages = DouyuDanmuSummaryHelper.load_session_messages(room_id, session)
if len(messages) < self._daily_report_min_messages:
continue
payload = DouyuDanmuSummaryHelper.build_llm_payload(room_id, session, messages)
session_payloads.append(payload)
meta = payload.get("session_meta", {}) or {}
operator_metrics = payload.get("operator_metrics", {}) or {}
total_message_count += int(meta.get("message_count", 0) or 0)
total_noise_filtered_count += int(meta.get("noise_filtered_count", 0) or 0)
total_organized_message_count += int(meta.get("organized_message_count", 0) or 0)
nickname = nickname or str(meta.get("nickname") or session.get("nickname") or "")
room_name = room_name or str(meta.get("room_name") or session.get("room_name") or "")
for key in operator_totals:
operator_totals[key] += int(operator_metrics.get(key, 0) or 0)
for item in payload.get("merged_templates", []) or []:
merged_templates.append(dict(item))
for item in payload.get("repeated_messages", []) or []:
repeated_messages.append(dict(item))
for item in payload.get("peak_buckets", []) or []:
peak_buckets.append(dict(item))
for item in payload.get("representative_messages", []) or []:
representative_messages.append(dict(item))
for item in payload.get("raw_window_samples", []) or []:
raw_window_samples.append(dict(item))
for item in payload.get("top_terms", []) or []:
term = str(item.get("term") or "").strip()
if term:
top_terms_counter[term] += int(item.get("count", 0) or 0)
for item in payload.get("burst_terms", []) or []:
term = str(item.get("text") or "").strip()
if term:
burst_terms_counter[term] += int(item.get("count", 0) or 0)
for item in operator_metrics.get("top_badges", []) or []:
badge_name = str(item.get("badge_name") or "").strip()
if badge_name:
top_badge_counter[badge_name] += int(item.get("user_count", 0) or 0)
top_badge_message_counter[badge_name] += int(item.get("message_count", 0) or 0)
for item in operator_metrics.get("top_active_users", []) or []:
uid = str(item.get("uid") or "").strip()
if not uid:
continue
existing = top_active_user_map.get(uid)
message_count = int(item.get("message_count", 0) or 0)
organized_message_count = int(item.get("organized_message_count", 0) or 0)
room_level = int(item.get("room_level", 0) or 0)
fans_level = int(item.get("fans_level", 0) or 0)
if not existing:
top_active_user_map[uid] = {
"uid": uid,
"nickname": str(item.get("nickname") or "").strip(),
"message_count": message_count,
"organized_message_count": organized_message_count,
"room_level": room_level,
"fans_name": str(item.get("fans_name") or "").strip(),
"fans_level": fans_level,
"noble_name": str(item.get("noble_name") or "").strip(),
}
continue
existing["message_count"] = int(existing.get("message_count", 0) or 0) + message_count
existing["organized_message_count"] = int(existing.get("organized_message_count", 0) or 0) + organized_message_count
if not str(existing.get("nickname") or "").strip():
existing["nickname"] = str(item.get("nickname") or "").strip()
if room_level > int(existing.get("room_level", 0) or 0):
existing["room_level"] = room_level
if fans_level > int(existing.get("fans_level", 0) or 0):
existing["fans_level"] = fans_level
if not str(existing.get("fans_name") or "").strip():
existing["fans_name"] = str(item.get("fans_name") or "").strip()
if not str(existing.get("noble_name") or "").strip():
existing["noble_name"] = str(item.get("noble_name") or "").strip()
for session_message in messages:
uid = str(session_message.get("uid") or "").strip()
if uid:
total_unique_users.add(uid)
if not session_payloads:
return None
merged_templates.sort(key=lambda item: int(item.get("count", 0) or 0), reverse=True)
repeated_messages.sort(key=lambda item: int(item.get("count", 0) or 0), reverse=True)
peak_buckets.sort(key=lambda item: int(item.get("message_count", 0) or 0), reverse=True)
artifact_dir = os.path.join("temp", "douyu_materials")
os.makedirs(artifact_dir, exist_ok=True)
audience_trend = self._build_audience_trend(sessions)
payload = {
"report_meta": {
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": nickname,
"room_name": room_name,
"session_count": len(session_payloads),
"message_count": total_message_count,
"noise_filtered_count": total_noise_filtered_count,
"organized_message_count": total_organized_message_count,
"unique_user_count": len(total_unique_users),
},
"operator_metrics": {
**operator_totals,
"fans_badge_user_ratio": round(operator_totals["fans_badge_user_count"] / max(len(total_unique_users), 1), 4),
"top_badges": [
{
"badge_name": badge_name,
"user_count": user_count,
"message_count": int(top_badge_message_counter.get(badge_name, 0) or 0),
}
for badge_name, user_count in top_badge_counter.most_common(10)
],
"top_active_users": sorted(
top_active_user_map.values(),
key=lambda item: (
int(item.get("message_count", 0) or 0),
int(item.get("organized_message_count", 0) or 0),
),
reverse=True,
)[:12],
},
"sessions": [
{
"session_id": (item.get("session_meta", {}) or {}).get("session_id", ""),
"segments": (item.get("session_meta", {}) or {}).get("segments", []),
"message_count": (item.get("session_meta", {}) or {}).get("message_count", 0),
"organized_message_count": (item.get("session_meta", {}) or {}).get("organized_message_count", 0),
}
for item in session_payloads
],
"audience_trend": audience_trend,
"merged_templates": merged_templates[:24],
"repeated_messages": repeated_messages[:24],
"top_terms": [{"term": term, "count": count} for term, count in top_terms_counter.most_common(24)],
"burst_terms": [{"text": term, "count": count} for term, count in burst_terms_counter.most_common(16)],
"peak_buckets": peak_buckets[:10],
"representative_messages": representative_messages[:24],
"raw_window_samples": raw_window_samples[:10],
}
artifact_path = os.path.join(artifact_dir, f"{room_id}_{anchor_day.replace('-', '')}_daily_report_payload.json")
with open(artifact_path, "w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
return payload
def _build_daily_report_prompt(self, payload: Dict[str, Any]) -> Tuple[str, str]:
meta = payload.get("report_meta", {}) or {}
system_prompt = (
"你是斗鱼直播日报助手。请基于给定的结构化弹幕材料,输出一份适合发群的中文日报。"
"要求简洁、自然、信息密度高,不要编造,不要使用代码块。"
)
user_prompt = (
"请输出一份斗鱼每日报告,格式要求:\n"
"1. 第一行写标题,包含主播名和日期。\n"
"2. 用 3-5 条概括直播主线、弹幕情绪、观众关注点。\n"
"3. 单独补充运营视角观察,比如带牌活跃用户、高等级用户、核心发言用户、活跃牌子分布。\n"
"4. 单独列出高频梗/复读内容(不超过 5 条)。\n"
"5. 单独列出 2-3 个热点时段。\n"
"6. 整体控制在 600 字以内。\n\n"
f"材料如下:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
)
return system_prompt, user_prompt
def _build_danmu_summary_prompt(self, payload: Dict[str, Any]) -> Tuple[str, str]:
meta = payload.get("report_meta", {}) or {}
system_prompt = (
"你是直播弹幕总结助手。请只根据给定材料,总结这场直播的弹幕内容与氛围。"
"不要输出运营数据,不要编造,不要写空话套话。"
)
user_prompt = (
"请输出一段适合放在日报图片上半部分的弹幕总结,要求:\n"
"1. 先用 1 段总述直播氛围与主线。\n"
"2. 再用 5 条要点总结观众关注点、情绪变化、反复出现的梗、节奏变化和额外反馈,每条只写一句。\n"
"3. 另起一行固定写标题:`【粉丝向弹幕萃取】`。\n"
"4. 在该标题下输出 4-6 条短句,尽量保留弹幕原话风格(可以保留口头语、玩梗、情绪词)。\n"
"5. 整体语气要像“直播间现场记录”,不要写成运营复盘。\n"
"6. 不要写“根据数据”“建议”“策略”等词。\n\n"
f"主播:{meta.get('nickname') or meta.get('room_name') or meta.get('room_id')}\n"
f"日期:{meta.get('anchor_day', '')}\n"
f"材料:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
)
return system_prompt, user_prompt
def _build_fans_extract_lines(self, payload: Dict[str, Any], limit: int = 6) -> List[str]:
# 粉丝向萃取强调“可读、像现场弹幕”,优先取代表发言,再补充重复梗与情绪短词。
representative_messages = payload.get("representative_messages", []) or []
repeated_messages = payload.get("repeated_messages", []) or []
merged_templates = payload.get("merged_templates", []) or []
burst_terms = payload.get("burst_terms", []) or []
lines: List[str] = []
seen = set()
def push(text: str) -> None:
value = str(text or "").strip()
if not value:
return
key = value.lower()
if key in seen:
return
seen.add(key)
lines.append(value)
for item in representative_messages[:10]:
nickname = str(item.get("nickname") or "").strip() or "观众"
content = str(item.get("content") or "").strip()
if content:
push(f"{nickname}{content[:56]}")
if len(lines) >= limit:
return lines[:limit]
for item in repeated_messages[:6]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
push(f"复读梗「{text[:36]}」刷了 {count} 次。")
if len(lines) >= limit:
return lines[:limit]
for item in merged_templates[:6]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
push(f"共识弹幕「{text[:36]}」出现 {count} 次。")
if len(lines) >= limit:
return lines[:limit]
for item in burst_terms[:4]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
push(f"情绪短词「{text}」集中出现 {count} 次。")
if len(lines) >= limit:
return lines[:limit]
return lines[:limit]
def _build_fallback_daily_report(self, payload: Dict[str, Any]) -> str:
meta = payload.get("report_meta", {}) or {}
title_name = str(meta.get("nickname") or meta.get("room_name") or meta.get("room_id") or "主播")
lines = [
f"斗鱼每日报告 | {title_name} | {meta.get('anchor_day', '')}",
f"{meta.get('session_count', 0)} 场,弹幕 {meta.get('message_count', 0)} 条,参与用户 {meta.get('unique_user_count', 0)} 人。",
]
operator_metrics = payload.get("operator_metrics", {}) or {}
sessions = payload.get("sessions", []) or []
if sessions:
session_parts = []
for item in sessions[:4]:
segments = item.get("segments", []) or []
if not segments:
continue
start_time = str(segments[0].get("start_time", ""))[-8:-3]
end_time = str(segments[-1].get("end_time", ""))[-8:-3]
session_parts.append(f"{start_time}-{end_time}")
if session_parts:
lines.append("场次时间:" + " / ".join(session_parts))
top_terms = payload.get("top_terms", []) or []
if top_terms:
lines.append("关注焦点:" + "".join([str(item.get("term") or "") for item in top_terms[:8] if str(item.get("term") or "").strip()]))
if operator_metrics:
op_parts = []
fans_badge_user_count = int(operator_metrics.get("fans_badge_user_count", 0) or 0)
high_room_level_user_count = int(operator_metrics.get("high_room_level_user_count", 0) or 0)
high_fans_level_user_count = int(operator_metrics.get("high_fans_level_user_count", 0) or 0)
active_users_10plus = int(operator_metrics.get("active_users_10plus", 0) or 0)
if fans_badge_user_count:
op_parts.append(f"带牌活跃用户 {fans_badge_user_count}")
if high_room_level_user_count:
op_parts.append(f"30级+活跃用户 {high_room_level_user_count}")
if high_fans_level_user_count:
op_parts.append(f"10级+粉丝牌用户 {high_fans_level_user_count}")
if active_users_10plus:
op_parts.append(f"高活跃核心用户 {active_users_10plus}")
if op_parts:
lines.append("运营侧:" + "".join(op_parts))
top_badges = operator_metrics.get("top_badges", []) or []
if top_badges:
lines.append("活跃粉丝牌:")
for item in top_badges[:5]:
badge_name = str(item.get("badge_name") or "").strip()
user_count = int(item.get("user_count", 0) or 0)
message_count = int(item.get("message_count", 0) or 0)
if badge_name:
lines.append(f"- {badge_name}{user_count}人,{message_count}")
merged_templates = payload.get("merged_templates", []) or []
if merged_templates:
lines.append("高频梗:")
for item in merged_templates[:5]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
lines.append(f"- {text[:42]}{count}次)")
peak_buckets = payload.get("peak_buckets", []) or []
if peak_buckets:
lines.append("热点时段:")
for item in peak_buckets[:3]:
start_time = str(item.get("start_time") or "")[-8:-3]
message_count = int(item.get("message_count", 0) or 0)
terms = "".join(
[str(term.get("term") or "") for term in (item.get("top_terms", []) or [])[:4] if str(term.get("term") or "").strip()]
)
lines.append(f"- {start_time}{message_count}条,关键词:{terms}")
representative_messages = payload.get("representative_messages", []) or []
if representative_messages:
lines.append("代表弹幕:")
for item in representative_messages[:4]:
nickname = str(item.get("nickname") or "").strip()
content = str(item.get("content") or "").strip()
if content:
lines.append(f"- {nickname}{content[:60]}")
text = "\n".join(lines).strip()
if len(text) > self._daily_report_max_length:
text = text[: self._daily_report_max_length - 20].rstrip() + "\n...(已截断)"
return text
def _build_fallback_danmu_summary(self, payload: Dict[str, Any]) -> str:
meta = payload.get("report_meta", {}) or {}
top_terms = [str(item.get("term") or "").strip() for item in (payload.get("top_terms", []) or [])[:6] if str(item.get("term") or "").strip()]
merged_templates = payload.get("merged_templates", []) or []
peak_buckets = payload.get("peak_buckets", []) or []
representative_messages = payload.get("representative_messages", []) or []
lines = [
f"{meta.get('anchor_day', '')} 这场直播弹幕整体比较密集,讨论重心主要围绕 {''.join(top_terms[:4]) or '对局过程'} 展开,观众互动意愿较强,梗和复读内容持续出现。"
]
if merged_templates:
sample_templates = "".join(
[str(item.get("text") or "").strip()[:26] for item in merged_templates[:3] if str(item.get("text") or "").strip()]
)
if sample_templates:
lines.append(f"- 主线观察:直播间共识梗很强,重复刷屏内容主要集中在 {sample_templates}")
for item in merged_templates[:4]:
break
if peak_buckets:
top_bucket = peak_buckets[0]
terms = "".join(
[str(term.get("term") or "") for term in (top_bucket.get("top_terms", []) or [])[:4] if str(term.get("term") or "").strip()]
)
lines.append(
f"- 节奏变化:高峰集中在 {str(top_bucket.get('start_time') or '')[-8:-3]} 前后,单时段弹幕 {int(top_bucket.get('message_count', 0) or 0)} 条,关键词偏向 {terms}"
)
if len(peak_buckets) > 1:
second_bucket = peak_buckets[1]
second_terms = "".join(
[str(term.get("term") or "") for term in (second_bucket.get("top_terms", []) or [])[:4] if str(term.get("term") or "").strip()]
)
lines.append(
f"- 热点补充:{str(second_bucket.get('start_time') or '')[-8:-3]} 也出现明显抬升,弹幕讨论继续围绕 {second_terms} 展开。"
)
if representative_messages:
lines.append("- 情绪特点:代表性发言里既有对操作和决策的即时反馈,也有大量玩梗、调侃和情绪宣泄。")
if top_terms:
lines.append(f"- 关注焦点:高频词主要落在 {''.join(top_terms[:6])},说明观众注意力相对集中。")
# 在兜底模式下也强制补出“粉丝向弹幕萃取”,避免图片模板出现空区块。
fans_extract_lines = self._build_fans_extract_lines(payload, limit=6)
if fans_extract_lines:
lines.append("【粉丝向弹幕萃取】")
for item in fans_extract_lines:
lines.append(f"- {item}")
return "\n".join(lines).strip()
def _build_operator_summary_text(self, payload: Dict[str, Any]) -> str:
meta = payload.get("report_meta", {}) or {}
operator_metrics = payload.get("operator_metrics", {}) or {}
total_users = int(meta.get("unique_user_count", 0) or 0)
fans_badge_users = int(operator_metrics.get("fans_badge_user_count", 0) or 0)
high_room_users = int(operator_metrics.get("high_room_level_user_count", 0) or 0)
high_fans_users = int(operator_metrics.get("high_fans_level_user_count", 0) or 0)
active_users_5plus = int(operator_metrics.get("active_users_5plus", 0) or 0)
active_users_10plus = int(operator_metrics.get("active_users_10plus", 0) or 0)
fans_badge_ratio = float(operator_metrics.get("fans_badge_user_ratio", 0) or 0)
lines = [
f"- 活跃用户规模:{total_users} 人,其中发言 5 次以上 {active_users_5plus}10 次以上 {active_users_10plus} 人。",
f"- 粉丝粘性:带粉丝牌活跃用户 {fans_badge_users} 人,占活跃用户 {fans_badge_ratio * 100:.1f}%10 级以上粉丝牌用户 {high_fans_users} 人。",
f"- 用户质量:房间等级 30 级以上活跃用户 {high_room_users} 人,说明高等级老观众参与度不低。",
]
audience_summary = (payload.get("audience_trend", {}) or {}).get("summary", {}) or {}
if audience_summary:
vip_min = int(audience_summary.get("vip_min", 0) or 0)
vip_max = int(audience_summary.get("vip_max", 0) or 0)
diamond_latest = int(audience_summary.get("diamond_latest", 0) or 0)
point_count = int(audience_summary.get("point_count", 0) or 0)
lines.append(
f"- 人数走势WS 侧共采样 {point_count} 个时间点,贵宾在 {vip_min}-{vip_max} 区间波动,钻粉收盘约 {diamond_latest}"
)
top_badges = payload.get("operator_metrics", {}).get("top_badges", []) or []
if top_badges:
badge_parts = []
for item in top_badges[:5]:
badge_name = str(item.get("badge_name") or "").strip()
if not badge_name:
continue
badge_parts.append(f"{badge_name} {int(item.get('user_count', 0) or 0)}人/{int(item.get('message_count', 0) or 0)}")
if badge_parts:
lines.append(f"- 活跃牌子分布:{''.join(badge_parts)}")
top_active_users = payload.get("operator_metrics", {}).get("top_active_users", []) or []
if top_active_users:
core_parts = []
for item in top_active_users[:5]:
nickname = str(item.get("nickname") or item.get("uid") or "").strip()
msg_count = int(item.get("message_count", 0) or 0)
fans_name = str(item.get("fans_name") or "").strip()
fans_level = int(item.get("fans_level", 0) or 0)
room_level = int(item.get("room_level", 0) or 0)
tags = []
if fans_name:
if fans_level > 0:
tags.append(f"{fans_name} Lv{fans_level}")
else:
tags.append(fans_name)
if room_level > 0:
tags.append(f"平台 Lv{room_level}")
tags.append(f"{msg_count}")
core_parts.append(f"{nickname}{''.join(tags)}")
if core_parts:
lines.append(f"- 核心发言用户:{''.join(core_parts)}")
return "\n".join(lines).strip()
def _build_operator_summary_lines(self, payload: Dict[str, Any]) -> List[str]:
return [line.strip()[2:].strip() for line in self._build_operator_summary_text(payload).splitlines() if line.strip().startswith("- ")]
def _build_dify_daily_report_inputs(
self,
*,
task_type: str,
system_prompt: str,
user_prompt: str,
payload: Dict[str, Any],
) -> Dict[str, Any]:
"""
组装斗鱼日报在 Dify Workflow 下的输入参数。
设计目标:
1. 让工作流既能拿到“最终自然语言提示词”,也能拿到“结构化原始载荷”;
2. 让一个工作流通过 task_type 同时处理「日报正文」和「弹幕摘要」两类任务;
3. 保留关键元信息,便于在工作流内做分支、日志与降级兜底。
"""
meta = payload.get("report_meta", {}) or {}
room_id = str(meta.get("room_id") or "").strip()
anchor_day = str(meta.get("anchor_day") or "").strip()
nickname = str(meta.get("nickname") or meta.get("room_name") or "").strip()
# 说明:
# 1. 部分 Dify Workflow 对输入变量类型校验较严格,复杂对象(dict/list)容易触发 400
# 2. 默认只提交精简字符串字段,优先保证链路可用;
# 3. 如需在工作流内使用完整载荷,可通过 include_structured_inputs 开关启用。
inputs = {
# 任务路由字段:在 Dify 条件分支里用于区分日报正文/弹幕摘要。
"task_type": task_type,
# 兼容 Workflow 中直接读取 query 的场景。
"query": user_prompt,
# 保留原有两段提示词,便于工作流内部二次拼装或调试。
"system_prompt": system_prompt,
"user_prompt": user_prompt,
# 关键元信息:用于日志、标题拼接、数据看板或异常追踪。
"room_id": room_id,
"anchor_day": anchor_day,
"nickname": nickname,
# 控制输出长度Dify 该变量在部分工作流中配置为 paragraph(字符串)类型,
# 因此这里统一传字符串,避免出现 “max_length must be a string” 的 400 校验错误。
"max_length": str(int(self._daily_report_max_length or 1800)),
}
if self._daily_report_include_structured_inputs:
inputs["report_payload_json"] = json.dumps(payload, ensure_ascii=False)
return inputs
def _call_daily_report_llm(
self,
*,
task_type: str,
system_prompt: str,
user_prompt: str,
payload: Dict[str, Any],
tag: str,
) -> str:
"""
统一封装斗鱼日报 LLM 调用。
- Dify provider走 run(inputs) 进入 Workflow确保输入结构稳定可编排
- 其他 provider保持原 chat(system,user) 行为,兼容现有 OpenAI-compatible 配置。
"""
if not self._daily_report_llm_client:
return ""
meta = payload.get("report_meta", {}) or {}
room_id = str(meta.get("room_id") or "").strip()
user_id = f"douyu_daily_report_{room_id or 'unknown'}"
if self._daily_report_llm_client.provider == "dify":
inputs = self._build_dify_daily_report_inputs(
task_type=task_type,
system_prompt=system_prompt,
user_prompt=user_prompt,
payload=payload,
)
result = self._daily_report_llm_client.run(
prompt=user_prompt,
user=user_id,
inputs=inputs,
tag=tag,
)
return str((result or {}).get("text", "") or "").strip()
return self._daily_report_llm_client.chat(
system_prompt,
user_prompt,
user_id=user_id,
).strip()
async def _generate_danmu_summary_text(self, payload: Dict[str, Any]) -> str:
if self._daily_report_use_llm and self._daily_report_llm_client:
system_prompt, user_prompt = self._build_danmu_summary_prompt(payload)
result = await asyncio.to_thread(
self._call_daily_report_llm,
task_type="danmu_summary",
system_prompt=system_prompt,
user_prompt=user_prompt,
payload=payload,
tag=f"douyu_danmu_summary_{(payload.get('report_meta', {}) or {}).get('room_id', '')}",
)
if result:
return result.strip()
logger.warning(
f"斗鱼弹幕总结 LLM 生成失败: model={self._daily_report_llm_client.model}, "
f"last_error={self._daily_report_llm_client.last_error}"
)
return self._build_fallback_danmu_summary(payload)
async def _build_daily_report_markdown(self, payload: Dict[str, Any]) -> str:
meta = payload.get("report_meta", {}) or {}
title_name = str(meta.get("nickname") or meta.get("room_name") or meta.get("room_id") or "主播")
danmu_summary = await self._generate_danmu_summary_text(payload)
operator_summary = self._build_operator_summary_text(payload)
lines = [
f"# {title_name} 直播每日报告",
f"{meta.get('anchor_day', '')}|场次 {meta.get('session_count', 0)}|弹幕 {meta.get('message_count', 0)}|活跃用户 {meta.get('unique_user_count', 0)}",
"",
"## 弹幕总结",
danmu_summary,
"",
"## 运营数据总结",
operator_summary,
]
peak_buckets = payload.get("peak_buckets", []) or []
if peak_buckets:
lines.extend([
"",
"## 热点时段",
])
for item in peak_buckets[:3]:
terms = "".join(
[str(term.get("term") or "") for term in (item.get("top_terms", []) or [])[:4] if str(term.get("term") or "").strip()]
)
lines.append(
f"- `{str(item.get('start_time') or '')[-8:-3]}` 弹幕 {int(item.get('message_count', 0) or 0)} 条,关键词:{terms}"
)
merged_templates = payload.get("merged_templates", []) or []
if merged_templates:
lines.extend([
"",
"## 高频梗",
])
for item in merged_templates[:5]:
text = str(item.get("text") or "").strip()
count = int(item.get("count", 0) or 0)
if text:
lines.append(f"- {text[:72]}{count}次)")
return "\n".join(lines).strip()
async def _render_daily_report_image(self, payload: Dict[str, Any]) -> Optional[str]:
markdown = await self._build_daily_report_markdown(payload)
room_id = str((payload.get("report_meta", {}) or {}).get("room_id", "") or "room")
anchor_day = str((payload.get("report_meta", {}) or {}).get("anchor_day", "") or "").replace("-", "")
filename = f"douyu_daily_report_{room_id}_{anchor_day}.png"
try:
danmu_summary = await self._generate_danmu_summary_text(payload)
html_content = render_daily_report_html(
payload=payload,
danmu_summary=danmu_summary,
operator_summary_lines=self._build_operator_summary_lines(payload),
)
output_dir = os.path.join(os.getcwd(), "temp", "md2image")
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, filename)
await html_to_image(html_content, output_path)
return str(Path(output_path).resolve())
except Exception as e:
logger.error(f"斗鱼专用模板图片生成失败(room={room_id}, day={anchor_day}): {e}")
try:
return await convert_md_str_to_image(markdown, filename)
except Exception as e:
logger.error(f"斗鱼每日报告图片生成失败(room={room_id}, day={anchor_day}): {e}")
return None
async def _generate_daily_report_text(self, payload: Dict[str, Any]) -> str:
if self._daily_report_use_llm and self._daily_report_llm_client:
system_prompt, user_prompt = self._build_daily_report_prompt(payload)
result = await asyncio.to_thread(
self._call_daily_report_llm,
task_type="daily_report",
system_prompt=system_prompt,
user_prompt=user_prompt,
payload=payload,
tag=f"douyu_daily_report_{(payload.get('report_meta', {}) or {}).get('room_id', '')}",
)
if result:
text = result.strip()
if len(text) > self._daily_report_max_length:
return text[: self._daily_report_max_length - 20].rstrip() + "\n...(已截断)"
return text
logger.warning(
f"斗鱼每日报告 LLM 生成失败: model={self._daily_report_llm_client.model}, "
f"last_error={self._daily_report_llm_client.last_error}"
)
return self._build_fallback_daily_report(payload)
async def _get_or_create_daily_report_result(
self,
room_id: str,
anchor_day: str,
payload: Dict[str, Any],
*,
force_regenerate: bool = False,
) -> Dict[str, Any]:
# force_regenerate=True 时,跳过本地缓存读取,直接重新生成文本/图片并覆盖缓存。
# 这样可以在模型提示词或模板变更后,通过命令立即验证最新效果。
cached = {} if force_regenerate else (self._load_daily_report_cache(room_id, anchor_day) or {})
cached_image = self._resolve_existing_report_image(cached.get("report_image"))
cached_text = str(cached.get("report_text") or "").strip()
cached_version = int(cached.get("cache_version", 0) or 0)
if cached_version >= self._DAILY_REPORT_CACHE_VERSION and (cached_image or cached_text):
return {
"report_text": cached_text,
"report_image": cached_image,
"cached": True,
}
report_text = await self._generate_daily_report_text(payload)
report_image = None
if self._daily_report_send_image:
report_image = await self._render_daily_report_image(payload)
result = {
"room_id": room_id,
"anchor_day": anchor_day,
"cache_version": self._DAILY_REPORT_CACHE_VERSION,
"report_text": report_text,
"report_image": report_image,
"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
}
self._save_daily_report_cache(room_id, anchor_day, result)
result["cached"] = False
return result
async def _send_daily_reports(
self,
anchor_day: str,
target_group_id: Optional[str] = None,
force: bool = False,
force_regenerate: bool = False,
) -> bool:
rooms = (
set(self.redis_manager.list_group_rooms(target_group_id))
if target_group_id
else self.redis_manager.all_subscribed_rooms()
)
if not rooms:
logger.info(
f"斗鱼每日报告无可处理房间: day={anchor_day}, target_group={target_group_id or 'ALL'}"
)
return False
delivered_any = False
for room_id in rooms:
if not force and self.redis_manager.get_text_value(self._daily_report_room_key(room_id, anchor_day)):
logger.info(f"斗鱼每日报告已发送过,跳过: room={room_id}, day={anchor_day}")
continue
sessions = self._load_sessions_for_anchor_day(room_id, anchor_day)
if not sessions:
logger.info(f"斗鱼每日报告无 session: room={room_id}, day={anchor_day}")
continue
if any(bool(session.get("is_live")) for session in sessions):
logger.info(f"斗鱼每日报告存在直播中场次,跳过: room={room_id}, day={anchor_day}")
continue
payload = self._build_daily_report_payload(room_id, anchor_day, sessions)
if not payload:
logger.info(
f"斗鱼每日报告 payload 为空: room={room_id}, day={anchor_day}, "
f"sessions={len(sessions)}, min_messages={self._daily_report_min_messages}"
)
continue
report_result = await self._get_or_create_daily_report_result(
room_id,
anchor_day,
payload,
force_regenerate=force_regenerate,
)
report_text = str(report_result.get("report_text") or "").strip()
report_image = self._resolve_existing_report_image(report_result.get("report_image"))
groups = [target_group_id] if target_group_id else self.redis_manager.groups_for_room(room_id)
delivered = False
for gid in groups:
if not gid:
continue
if GroupBotManager.get_group_permission(gid, self.feature) != PermissionStatus.ENABLED:
continue
try:
if report_image:
await self.bot.send_image_message(gid, Path(report_image))
else:
await self.bot.send_text_message(gid, report_text)
delivered = True
delivered_any = True
except Exception as e:
logger.error(f"发送斗鱼每日报告失败(room={room_id}, group={gid}): {e}")
if delivered:
self.redis_manager.set_text_value(
self._daily_report_room_key(room_id, anchor_day),
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
)
return delivered_any
def _start_danmu_record(self, room_id: str):
recorder = self._get_danmu_recorder(room_id)
recorder.start()
def _stop_danmu_record(self, room_id: str):
recorder = self._danmu_recorders.get(room_id)
if recorder:
recorder.stop()