abot/scripts/generate_dota2_douyin_images.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
批量生成 Dota2 英雄抖音图片脚本。

设计目标：
1. 这是一个一次性本地脚本，不依赖机器人运行时，直接走 HTTP 调用 OpenAI 兼容图片接口。
2. 脚本会自动从 OpenDota 拉取全部英雄列表，避免手工维护英雄名称。
3. 每个英雄默认生成 4 张图片，其中 2 张中文排版、2 张日文排版，统一使用相同的画面结构与风格模板，尽量保证成片风格一致。
4. 脚本支持断点续跑：如果目标文件已经存在，则默认跳过，避免重复计费。
"""

from __future__ import annotations

import argparse
import base64
from concurrent.futures import ThreadPoolExecutor, as_completed
import json
import os
import re
import sys
import threading
import time
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

import requests
import yaml


# 这里固定使用 OpenDota 的公开英雄接口，避免在脚本里硬编码整份英雄名单。
OPENDOTA_HERO_STATS_URL = "https://api.opendota.com/api/heroStats"

# 这里给出一个稳定的 9:16 尺寸，尽量兼容常见 OpenAI 兼容图片网关。
DEFAULT_IMAGE_SIZE = "1024x1792"

# 这里统一定义输出根目录，方便后续在一个目录里筛图、剪辑、上传抖音。
DEFAULT_OUTPUT_DIR = Path("temp") / "dota2_douyin_images"

# 这里固定每个英雄的 4 张图的语言分布，避免每次运行时还要手动指定。
DEFAULT_LANGUAGE_VARIANTS = ["zh", "zh", "ja", "ja"]

# 这里把默认并发数固定为 4，满足你“开 4 个线程跑”的诉求。
DEFAULT_MAX_WORKERS = 4

# 这里把图片生成默认超时统一收敛到 180 秒：
# 1. 300 秒过长时，失败反馈会比较慢，不利于批量任务及时发现问题；
# 2. 60 秒又可能不足以覆盖部分高峰期图片生成耗时；
# 3. 180 秒作为脚本级默认值，更适合当前这条 Dota2 抖音批量生图链路。
DEFAULT_REQUEST_TIMEOUT_SECONDS = 180


def parse_args() -> argparse.Namespace:
    """解析命令行参数。"""
    parser = argparse.ArgumentParser(
        description="批量为 Dota2 全英雄生成抖音竖版图片。"
    )
    parser.add_argument(
        "--config",
        default="config.yaml",
        help="项目根目录下的配置文件路径，默认读取 config.yaml。",
    )
    parser.add_argument(
        "--scene",
        default="image.generate",
        help="LLM 场景名，默认使用 config.yaml 中的 image.generate。",
    )
    parser.add_argument(
        "--output-dir",
        default=str(DEFAULT_OUTPUT_DIR),
        help="图片输出目录。",
    )
    parser.add_argument(
        "--count-per-hero",
        type=int,
        default=4,
        help="每个英雄生成的图片数量，默认 4 张（2 张中文、2 张日文）。",
    )
    parser.add_argument(
        "--size",
        default=DEFAULT_IMAGE_SIZE,
        help="图片尺寸，默认 1024x1792（9:16）。",
    )
    parser.add_argument(
        "--quality",
        default="high",
        help="图片质量参数，默认 high。",
    )
    parser.add_argument(
        "--timeout",
        type=int,
        default=DEFAULT_REQUEST_TIMEOUT_SECONDS,
        help="单次请求超时时间（秒），默认 180 秒。",
    )
    parser.add_argument(
        "--delay",
        type=float,
        default=1.5,
        help="每次成功生成后的等待时间（秒），默认 1.5 秒，避免打满网关。",
    )
    parser.add_argument(
        "--max-retries",
        type=int,
        default=3,
        help="单张图片失败后的最大重试次数，默认 3 次。",
    )
    parser.add_argument(
        "--hero-limit",
        type=int,
        default=0,
        help="仅生成前 N 个英雄，0 表示全部生成，便于先小范围试跑。",
    )
    parser.add_argument(
        "--hero-filter",
        default="",
        help="只生成英雄名中包含该关键字的英雄，便于单独补图。",
    )
    parser.add_argument(
        "--max-workers",
        type=int,
        default=DEFAULT_MAX_WORKERS,
        help="并发线程数，默认 4。",
    )
    parser.add_argument(
        "--force",
        action="store_true",
        help="即使目标文件已存在，也强制重新生成。",
    )
    return parser.parse_args()


def load_yaml_config(config_path: str) -> Dict[str, Any]:
    """读取 YAML 配置文件。"""
    with open(config_path, "r", encoding="utf-8") as file_obj:
        return yaml.safe_load(file_obj) or {}


def resolve_image_backend(config_data: Dict[str, Any], scene_name: str) -> Dict[str, Any]:
    """
    根据 scene 解析图片后端配置。

    这里故意只实现本脚本需要的最小能力：
    1. 先用 llm.scenes 把 scene 映射到 backend 名；
    2. 再从 llm.backends 里取出接口配置；
    3. 保持脚本简单直接，不引入项目运行时数据库逻辑。
    """
    llm_config = config_data.get("llm", {}) or {}
    scenes = llm_config.get("scenes", {}) or {}
    backends = llm_config.get("backends", {}) or {}

    backend_name = str(scenes.get(scene_name) or "").strip()
    if not backend_name:
        raise ValueError(f"未在 config.yaml 的 llm.scenes 中找到场景: {scene_name}")

    backend_config = backends.get(backend_name, {}) or {}
    if not backend_config:
        raise ValueError(f"未在 config.yaml 的 llm.backends 中找到后端: {backend_name}")

    return {
        "backend_name": backend_name,
        "provider": str(backend_config.get("provider") or "").strip(),
        "api_base_url": str(backend_config.get("api_base_url") or backend_config.get("base_url") or "").strip(),
        "api_key": str(backend_config.get("api_key") or "").strip(),
        "model": str(backend_config.get("model") or "gpt-image-1").strip(),
        "endpoint": "images/generations",
        # 这里给脚本自己的后端超时兜底值也同步改成 180 秒，
        # 避免配置文件里没写 timeout_seconds 时又悄悄回退到旧的 300 秒。
        "timeout_seconds": int(backend_config.get("timeout_seconds") or DEFAULT_REQUEST_TIMEOUT_SECONDS),
    }


def build_request_url(api_base_url: str, endpoint: str) -> str:
    """拼接图片接口 URL。"""
    return f"{api_base_url.rstrip('/')}/{endpoint.lstrip('/')}"


def build_auth_header(api_key: str) -> str:
    """生成 Bearer 鉴权头。"""
    normalized_api_key = str(api_key or "").strip()
    if normalized_api_key.lower().startswith("bearer "):
        return normalized_api_key
    return f"Bearer {normalized_api_key}"


def sanitize_filename(value: str) -> str:
    """
    清理文件名中的非法字符。

    这里保留中英文、数字、下划线、连字符，避免 Windows 路径报错。
    """
    cleaned = re.sub(r"[\\/:*?\"<>|]+", "_", value.strip())
    cleaned = re.sub(r"\s+", "_", cleaned)
    return cleaned or "unknown"


def fetch_dota2_heroes() -> List[Dict[str, str]]:
    """
    从 OpenDota 拉取英雄信息。

    返回字段说明：
    1. localized_name：更适合放进中文提示词里；
    2. english_name：更适合做英文辅助描述和文件夹命名；
    3. hero_id：方便后续写入清单或排查问题。
    """
    response = requests.get(OPENDOTA_HERO_STATS_URL, timeout=60)
    response.raise_for_status()

    hero_rows = response.json() or []
    heroes: List[Dict[str, str]] = []
    for hero_row in hero_rows:
        localized_name = str(hero_row.get("localized_name") or "").strip()
        internal_name = str(hero_row.get("name") or "").strip()
        english_name = internal_name.replace("npc_dota_hero_", "").replace("_", " ").title()

        if not localized_name:
            continue

        heroes.append(
            {
                "hero_id": str(hero_row.get("id") or "").strip(),
                "localized_name": localized_name,
                "english_name": english_name,
            }
        )

    # 这里按照英雄英文名排序，保证多次运行时输出顺序稳定。
    heroes.sort(key=lambda item: item["english_name"])
    return heroes


def resolve_text_language(image_index: int) -> str:
    """
    根据图片序号确定当前文案语言。

    约定规则：
    1. 第 1、2 张固定走中文排版；
    2. 第 3、4 张固定走日文排版；
    3. 如果用户把 count 调大，则从头循环复用这套语言分布。
    """
    variant_index = (image_index - 1) % len(DEFAULT_LANGUAGE_VARIANTS)
    return DEFAULT_LANGUAGE_VARIANTS[variant_index]


def build_consistent_prompt(hero: Dict[str, str], image_index: int) -> str:
    """
    构造统一风格的提示词。

    提示词策略：
    1. 固定所有英雄共用的版式、镜头语言、色彩、文字排版、雷达图要求；
    2. 只替换英雄身份信息，尽量让最终成片拥有统一系列感；
    3. 用“偏 JOJO 气质、夸张漫画表现”来强化目标风格。
    """
    hero_name_cn = hero["localized_name"]
    hero_name_en = hero["english_name"]
    text_language = resolve_text_language(image_index)
    if text_language == "zh":
        text_language_desc = "画面中的标题、副标题、能力说明文字统一使用中文排版，字体要有热血漫画海报感，禁止出现日文。"
        text_language_label = "中文"
    else:
        text_language_desc = "画面中的标题、副标题、能力说明文字统一使用日文排版，字体要有热血漫画海报感，禁止出现中文。"
        text_language_label = "日文"

    return f"""
请为短视频封面创作一张高完成度竖版插画，主体是 Dota2 英雄 {hero_name_cn}（{hero_name_en}）。

核心要求：
1. 角色设定明确为 Dota2 的风格体系下的“至宝级华丽皮肤质感”，但角色身份必须是 {hero_name_cn} 本人，不要画成别的英雄。
2. 画面整体要强烈偏向 JOJO 气质：夸张肌肉与体块、强烈明暗对比、戏剧化姿势、锐利线条、张力十足的漫画分镜感、厚重阴影、速度线、压迫感构图。
3. 需要比普通日漫更偏 JOJO 风，风格统一、成熟、硬朗、华丽，视觉冲击力强。
4. 画面左下角固定放一个“能力雷达图”，用日式游戏 UI 风格表现，半透明发光面板，结构清晰。
5. 画面中加入醒目的文字排版，像热血漫画标题与角色名字幕，排版要高级，不能乱码。
6. 构图固定为 9:16 竖版海报，适合抖音封面，角色居中偏上，保留底部与左下角的信息区。
7. 背景使用史诗感能量、替身感氛围、漫画速度线、粒子、光效，但不要遮挡主体脸和武器。
8. 质感统一为高细节、高完成度、商业海报、收藏级插画。
9. {text_language_desc}

稳定性要求：
1. 全系列都保持相同的版式语言、相同的信息层级、相同的雷达图位置、相同的标题风格。
2. 当前是同一英雄的第 {image_index} 张候选图，本张必须输出{text_language_label}版本；请只在姿势、镜头角度、背景能量流向上做有限变化，不要改变整体系列风格。
3. 不要出现水印、签名、Logo、拼贴、多角色、手部崩坏、脸部畸形、文字糊成乱码。
""".strip()


def extract_image_bytes(response_json: Dict[str, Any], timeout_seconds: int) -> bytes:
    """
    从 OpenAI 兼容响应中提取图片字节。

    兼容两种常见返回格式：
    1. b64_json：直接解码；
    2. url：再补一次下载。
    """
    data_list = response_json.get("data") or []
    if not data_list:
        raise ValueError(f"接口返回里没有 data 字段: {json.dumps(response_json, ensure_ascii=False)[:500]}")

    first_item = data_list[0] or {}
    b64_content = (
        first_item.get("b64_json")
        or first_item.get("image_base64")
        or first_item.get("base64")
        or ""
    )
    if b64_content:
        return base64.b64decode(b64_content)

    image_url = str(first_item.get("url") or first_item.get("image_url") or "").strip()
    if image_url:
        download_response = requests.get(image_url, timeout=timeout_seconds)
        download_response.raise_for_status()
        return download_response.content

    raise ValueError(f"无法从响应中提取图片内容: {json.dumps(first_item, ensure_ascii=False)[:500]}")


def generate_one_image(
    request_url: str,
    api_key: str,
    model: str,
    prompt: str,
    image_size: str,
    image_quality: str,
    timeout_seconds: int,
) -> bytes:
    """调用 OpenAI 兼容图片接口生成单张图片。"""
    headers = {
        "Content-Type": "application/json",
        "Authorization": build_auth_header(api_key),
    }
    payload = {
        "model": model,
        "prompt": prompt,
        "n": 1,
        "size": image_size,
        "quality": image_quality,
        "response_format": "b64_json",
        "user": "dota2_douyin_batch_generator",
    }

    response = requests.post(
        request_url,
        headers=headers,
        json=payload,
        timeout=timeout_seconds,
    )
    response.raise_for_status()
    response_json = response.json() or {}
    return extract_image_bytes(response_json, timeout_seconds)


def append_manifest_row(manifest_path: Path, row: Dict[str, Any]) -> None:
    """
    以 JSONL 方式追加生成记录。

    这样做的好处是：
    1. 即使脚本中途停止，前面已成功的记录也不会丢；
    2. 方便后续按英雄筛选、统计或补图。
    """
    with manifest_path.open("a", encoding="utf-8") as file_obj:
        file_obj.write(json.dumps(row, ensure_ascii=False) + "\n")


def ensure_output_dir(output_dir: Path) -> None:
    """确保输出目录存在。"""
    output_dir.mkdir(parents=True, exist_ok=True)


def build_generation_tasks(
    heroes: List[Dict[str, str]],
    output_dir: Path,
    count_per_hero: int,
) -> List[Dict[str, Any]]:
    """
    预先展开所有生图任务。

    这样做的目的：
    1. 先把“英雄 x 第几张图”拍平成统一任务列表，便于线程池直接消费；
    2. 任务对象中提前算好输出目录、文件名、提示词，线程里只负责执行；
    3. 任务顺序保持稳定，后续日志更容易排查。
    """
    tasks: List[Dict[str, Any]] = []
    total_heroes = len(heroes)

    for hero_index, hero in enumerate(heroes, start=1):
        hero_slug = sanitize_filename(hero["english_name"].lower().replace(" ", "_"))
        hero_dir = output_dir / f"{hero_slug}_{sanitize_filename(hero['localized_name'])}"
        ensure_output_dir(hero_dir)

        for image_index in range(1, count_per_hero + 1):
            file_name = f"{hero_slug}_{image_index:02d}.png"
            image_path = hero_dir / file_name
            tasks.append(
                {
                    "hero": hero,
                    "hero_index": hero_index,
                    "total_heroes": total_heroes,
                    "hero_slug": hero_slug,
                    "hero_dir": hero_dir,
                    "image_index": image_index,
                    "image_path": image_path,
                    "prompt": build_consistent_prompt(hero, image_index),
                }
            )

    return tasks


def run_single_generation_task(
    task: Dict[str, Any],
    request_url: str,
    api_key: str,
    model: str,
    image_size: str,
    image_quality: str,
    timeout_seconds: int,
    max_retries: int,
    delay_seconds: float,
    force: bool,
    manifest_path: Path,
    manifest_lock: threading.Lock,
    print_lock: threading.Lock,
) -> Tuple[str, Dict[str, Any]]:
    """
    在线程池中执行单个图片生成任务。

    返回值约定：
    1. status 为 success / skipped / failed 三种之一；
    2. payload 会带上日志和清单记录所需的数据，主线程只负责汇总结果；
    3. manifest 写入放在线程内完成，但通过锁保证同一时刻只有一个线程落盘。
    """
    hero = task["hero"]
    image_index = task["image_index"]
    image_path: Path = task["image_path"]
    prompt = task["prompt"]

    with print_lock:
        print(
            f"\n[{task['hero_index']}/{task['total_heroes']}] "
            f"处理英雄: {hero['localized_name']} ({hero['english_name']}) "
            f"- 第 {image_index} 张"
        )

    if image_path.exists() and not force:
        with print_lock:
            print(f"  - 已存在，跳过: {image_path.name}")
        return "skipped", {
            "hero_id": hero["hero_id"],
            "localized_name": hero["localized_name"],
            "english_name": hero["english_name"],
            "image_index": image_index,
            "image_path": str(image_path.as_posix()),
        }

    last_error: Optional[str] = None
    for retry_index in range(1, max_retries + 1):
        try:
            with print_lock:
                print(f"  - 生成第 {image_index} 张，尝试 {retry_index}/{max_retries}")

            image_bytes = generate_one_image(
                request_url=request_url,
                api_key=api_key,
                model=model,
                prompt=prompt,
                image_size=image_size,
                image_quality=image_quality,
                timeout_seconds=timeout_seconds,
            )

            with image_path.open("wb") as file_obj:
                file_obj.write(image_bytes)

            manifest_row = {
                "hero_id": hero["hero_id"],
                "localized_name": hero["localized_name"],
                "english_name": hero["english_name"],
                "image_index": image_index,
                "image_path": str(image_path.as_posix()),
                "size": image_size,
                "quality": image_quality,
                "model": model,
                "request_url": request_url,
                "generated_at": time.strftime("%Y-%m-%d %H:%M:%S"),
                "prompt": prompt,
            }

            # 这里用锁保护清单写入，避免多个线程同时写 JSONL 时内容互相穿插。
            with manifest_lock:
                append_manifest_row(manifest_path, manifest_row)

            with print_lock:
                print(f"  - 生成成功: {image_path.name}")

            time.sleep(delay_seconds)
            return "success", manifest_row
        except Exception as exc:
            last_error = str(exc)
            with print_lock:
                print(f"  - 生成失败: {last_error}")
            if retry_index < max_retries:
                # 这里做一个简短退避，降低临时网络波动或网关限流的影响。
                time.sleep(min(5, retry_index * 2))

    failed_row = {
        "hero_id": hero["hero_id"],
        "localized_name": hero["localized_name"],
        "english_name": hero["english_name"],
        "image_index": image_index,
        "image_path": str(image_path.as_posix()),
        "size": image_size,
        "quality": image_quality,
        "model": model,
        "request_url": request_url,
        "generated_at": time.strftime("%Y-%m-%d %H:%M:%S"),
        "error": last_error or "未知错误",
    }
    with manifest_lock:
        append_manifest_row(manifest_path, failed_row)
    return "failed", failed_row


def main() -> int:
    """脚本入口。"""
    args = parse_args()
    config_data = load_yaml_config(args.config)
    backend = resolve_image_backend(config_data, args.scene)

    if backend["provider"] != "openai_compatible":
        raise ValueError(
            f"场景 {args.scene} 对应的 provider 不是 openai_compatible，而是 {backend['provider']}"
        )
    if not backend["api_base_url"]:
        raise ValueError("图片后端缺少 api_base_url/base_url 配置")
    if not backend["api_key"]:
        raise ValueError("图片后端缺少 api_key 配置")

    output_dir = Path(args.output_dir)
    ensure_output_dir(output_dir)

    manifest_path = output_dir / "generation_manifest.jsonl"
    request_url = build_request_url(backend["api_base_url"], backend["endpoint"])
    # 这里优先使用命令行显式传入的超时值；
    # 若用户未额外指定，则沿用 argparse 默认值 180 秒。
    # 这样这个脚本的行为是稳定可预期的，不会再因为历史默认值导致请求挂太久。
    timeout_seconds = int(args.timeout or backend["timeout_seconds"])

    heroes = fetch_dota2_heroes()
    if args.hero_filter:
        keyword = args.hero_filter.lower().strip()
        heroes = [
            hero for hero in heroes
            if keyword in hero["localized_name"].lower() or keyword in hero["english_name"].lower()
        ]
    if args.hero_limit and args.hero_limit > 0:
        heroes = heroes[:args.hero_limit]

    if not heroes:
        print("没有匹配到任何英雄，请检查 --hero-filter 或网络状态。", file=sys.stderr)
        return 1

    print(f"共准备生成 {len(heroes)} 个英雄，每个英雄 {args.count_per_hero} 张。")
    print(f"图片接口: {request_url}")
    print(f"输出目录: {output_dir.resolve()}")
    print(f"并发线程数: {args.max_workers}")

    total_success = 0
    total_skipped = 0
    total_failed = 0
    manifest_lock = threading.Lock()
    print_lock = threading.Lock()
    tasks = build_generation_tasks(
        heroes=heroes,
        output_dir=output_dir,
        count_per_hero=args.count_per_hero,
    )

    # 这里将所有任务交给线程池统一调度，让脚本能够同时发起 4 个图片请求。
    with ThreadPoolExecutor(max_workers=max(1, int(args.max_workers))) as executor:
        future_to_task = {
            executor.submit(
                run_single_generation_task,
                task,
                request_url,
                backend["api_key"],
                backend["model"],
                args.size,
                args.quality,
                timeout_seconds,
                args.max_retries,
                args.delay,
                args.force,
                manifest_path,
                manifest_lock,
                print_lock,
            ): task
            for task in tasks
        }

        for future in as_completed(future_to_task):
            status, _ = future.result()
            if status == "success":
                total_success += 1
            elif status == "skipped":
                total_skipped += 1
            else:
                total_failed += 1

    print("\n生成完成。")
    print(f"成功: {total_success}")
    print(f"跳过: {total_skipped}")
    print(f"失败: {total_failed}")
    print(f"清单文件: {manifest_path.resolve()}")
    return 0 if total_failed == 0 else 2


if __name__ == "__main__":
    raise SystemExit(main())