Revert "本地清洗弹幕TXT并压缩重复刷屏内容"

This reverts commit 12a5d89c76.
This commit is contained in:
liuwei
2026-04-29 14:15:26 +08:00
parent 45ec52ce91
commit fd61f17448
2 changed files with 4 additions and 65 deletions

View File

@@ -2251,18 +2251,6 @@ class DouyuPlugin(MessagePluginInterface):
artifact_path = os.path.join(artifact_dir, f"{room_id}_{anchor_day.replace('-', '')}_daily_report_payload.json")
with open(artifact_path, "w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
# 额外落一份“专门给 LLM 看”的本地清洗 txt。
# 它和原始弹幕文件的区别在于:
# 1. 已经过滤系统噪音;
# 2. 已移除 UID
# 3. 已把短刷屏压缩成 `哈哈哈*120` 这类更省上下文的写法。
cleaned_transcript_path = os.path.join(
artifact_dir,
f"{room_id}_{anchor_day.replace('-', '')}_llm_transcript.txt",
)
with open(cleaned_transcript_path, "w", encoding="utf-8") as f:
f.write(str(payload.get("raw_danmu_transcript") or "").strip())
payload["cleaned_transcript_file"] = os.path.abspath(cleaned_transcript_path)
return payload
def _build_daily_report_prompt(self, payload: Dict[str, Any]) -> Tuple[str, str]:
@@ -2576,7 +2564,6 @@ class DouyuPlugin(MessagePluginInterface):
effective_lines = raw_lines[:max_lines]
lines = ["【按时间顺序整理的原始弹幕全文(已过滤系统噪音,仅合并完全相同重复内容)】"]
lines.append("说明:文本已移除 UID短刷屏弹幕会压缩成“哈哈哈*120”这类格式。")
if len(effective_lines) < len(raw_lines):
lines.append(f"以下仅展开前 {len(effective_lines)} 行,剩余内容因长度限制未继续拼接。")
lines.extend(effective_lines)
@@ -3016,23 +3003,14 @@ class DouyuPlugin(MessagePluginInterface):
def _build_dify_daily_report_files(self, payload: Dict[str, Any], user_id: str) -> List[Dict[str, Any]]:
"""
组装斗鱼日报要上传给 Dify 的原始文件列表。
当前优先上传“本地清洗后的 LLM 专用 txt,让工作流里的 sys.files
拿到的是更适合总结任务的材料,而不是带 UID / 平台噪音的原始源文件
当前优先上传当天命中的原始弹幕 txt让工作流里的 sys.files
真正拿到“源文件级材料,而不是只有摘要 JSON
"""
if not self._daily_report_llm_client or self._daily_report_llm_client.provider != "dify":
return []
uploaded_files: List[Dict[str, Any]] = []
upload_candidates: List[str] = []
cleaned_transcript_file = os.path.abspath(str(payload.get("cleaned_transcript_file") or "").strip())
if cleaned_transcript_file:
upload_candidates.append(cleaned_transcript_file)
for file_path in (payload.get("source_danmu_files", []) or [])[:2]:
normalized_source_path = os.path.abspath(str(file_path or "").strip())
if normalized_source_path and normalized_source_path not in upload_candidates:
upload_candidates.append(normalized_source_path)
for file_path in upload_candidates[:3]:
normalized_path = os.path.abspath(str(file_path or "").strip())
if not normalized_path or not os.path.exists(normalized_path) or not os.path.isfile(normalized_path):
continue