feat: initial release v0.3.0

2026-03-08 03:15:27 +08:00
commit 881ed44996
1311 changed files with 225407 additions and 0 deletions
--- a/src/components/voice/VoiceDesignDialogBase.tsx
+++ b/src/components/voice/VoiceDesignDialogBase.tsx
@@ -0,0 +1,244 @@
+'use client'
+
+import { useRef, useState } from 'react'
+import { createPortal } from 'react-dom'
+import { useTranslations } from 'next-intl'
+import { resolveTaskPresentationState } from '@/lib/task/presentation'
+import { AppIcon } from '@/components/ui/icons'
+import VoiceDesignGeneratorSection from './VoiceDesignGeneratorSection'
+import {
+  DEFAULT_VOICE_SCHEME_COUNT,
+  generateVoiceDesignOptions,
+  type GeneratedVoice,
+  type VoiceDesignMutationPayload,
+  type VoiceDesignMutationResult,
+} from './voice-design-shared'
+
+export type { VoiceDesignMutationPayload, VoiceDesignMutationResult } from './voice-design-shared'
+
+interface VoiceDesignDialogBaseProps {
+  isOpen: boolean
+  speaker: string
+  hasExistingVoice?: boolean
+  onClose: () => void
+  onSave: (voiceId: string, audioBase64: string) => void
+  onDesignVoice: (payload: VoiceDesignMutationPayload) => Promise<VoiceDesignMutationResult>
+}
+
+export default function VoiceDesignDialogBase({
+  isOpen,
+  speaker,
+  hasExistingVoice = false,
+  onClose,
+  onSave,
+  onDesignVoice,
+}: VoiceDesignDialogBaseProps) {
+  const t = useTranslations('common')
+  const tv = useTranslations('voice.voiceDesign')
+
+  const [voicePrompt, setVoicePrompt] = useState('')
+  const [previewText, setPreviewText] = useState(tv('defaultPreviewText'))
+  const [schemeCount, setSchemeCount] = useState(String(DEFAULT_VOICE_SCHEME_COUNT))
+  const [isDesignSubmitting, setIsDesignSubmitting] = useState(false)
+  const [error, setError] = useState<string | null>(null)
+  const [generatedVoices, setGeneratedVoices] = useState<GeneratedVoice[]>([])
+  const [selectedIndex, setSelectedIndex] = useState<number | null>(null)
+  const [showConfirmDialog, setShowConfirmDialog] = useState(false)
+  const [playingIndex, setPlayingIndex] = useState<number | null>(null)
+  const audioRef = useRef<HTMLAudioElement | null>(null)
+  const designSubmittingState = isDesignSubmitting
+    ? resolveTaskPresentationState({
+        phase: 'processing',
+        intent: 'generate',
+        resource: 'audio',
+        hasOutput: false,
+      })
+    : null
+
+  const handleGenerate = async () => {
+    if (!voicePrompt.trim()) {
+      setError(tv('pleaseSelectStyle'))
+      return
+    }
+
+    setIsDesignSubmitting(true)
+    setError(null)
+    setGeneratedVoices([])
+    setSelectedIndex(null)
+
+    try {
+      const voices = await generateVoiceDesignOptions({
+        count: schemeCount,
+        voicePrompt,
+        previewText,
+        defaultPreviewText: tv('defaultPreviewText'),
+        onDesignVoice,
+      })
+      setGeneratedVoices(voices)
+    } catch (err: unknown) {
+      const status = err instanceof Error ? (err as Error & { status?: number }).status : undefined
+      if (status === 402) {
+        const detail = err instanceof Error ? (err as Error & { detail?: string }).detail : undefined
+        alert(t('insufficientBalance') + '\n\n' + (detail || t('insufficientBalanceDetail')))
+        setError('INSUFFICIENT_BALANCE')
+        return
+      }
+
+      const message = err instanceof Error ? err.message : tv('generationError')
+      setError(message === 'VOICE_DESIGN_EMPTY_RESULT' ? tv('noVoiceGenerated') : (message || tv('generationError')))
+    } finally {
+      setIsDesignSubmitting(false)
+    }
+  }
+
+  const handlePlayVoice = (index: number) => {
+    if (playingIndex === index && audioRef.current) {
+      audioRef.current.pause()
+      setPlayingIndex(null)
+      return
+    }
+
+    if (audioRef.current) {
+      audioRef.current.pause()
+    }
+
+    setPlayingIndex(index)
+    const audio = new Audio(generatedVoices[index].audioUrl)
+    audioRef.current = audio
+    audio.onended = () => setPlayingIndex(null)
+    audio.onerror = () => setPlayingIndex(null)
+    void audio.play()
+  }
+
+  const handleConfirmSelection = () => {
+    if (selectedIndex !== null && generatedVoices[selectedIndex]) {
+      if (hasExistingVoice) {
+        setShowConfirmDialog(true)
+      } else {
+        doSave()
+      }
+    }
+  }
+
+  const doSave = () => {
+    if (selectedIndex !== null && generatedVoices[selectedIndex]) {
+      const voice = generatedVoices[selectedIndex]
+      onSave(voice.voiceId, voice.audioBase64)
+      handleClose()
+    }
+  }
+
+  const handleClose = () => {
+    setVoicePrompt('')
+    setPreviewText(tv('defaultPreviewText'))
+    setSchemeCount(String(DEFAULT_VOICE_SCHEME_COUNT))
+    setError(null)
+    setGeneratedVoices([])
+    setSelectedIndex(null)
+    setShowConfirmDialog(false)
+    setPlayingIndex(null)
+    if (audioRef.current) {
+      audioRef.current.pause()
+    }
+    onClose()
+  }
+
+  if (!isOpen) return null
+  if (typeof document === 'undefined') return null
+
+  const dialogContent = (
+    <>
+      <div className="fixed inset-0 z-[9999] glass-overlay" onClick={handleClose} />
+      <div
+        className="fixed z-[10000] left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2 glass-surface-modal w-full max-w-xl overflow-hidden"
+        onClick={(event) => event.stopPropagation()}
+      >
+        <div className="flex items-center justify-between px-5 py-3 border-b border-[var(--glass-stroke-base)] bg-[var(--glass-bg-surface-strong)]">
+          <div className="flex items-center gap-2">
+            <AppIcon name="mic" className="w-5 h-5 text-[var(--glass-tone-info-fg)]" />
+            <h2 className="font-semibold text-[var(--glass-text-primary)]">{tv('designVoiceFor', { speaker })}</h2>
+            {hasExistingVoice && (
+              <span className="glass-chip glass-chip-warning text-xs px-1.5 py-0.5">{tv('hasExistingVoice')}</span>
+            )}
+          </div>
+          <button onClick={handleClose} className="glass-btn-base glass-btn-soft p-1 text-[var(--glass-text-tertiary)]">
+            <AppIcon name="close" className="w-5 h-5" />
+          </button>
+        </div>
+
+        <div className="p-5 space-y-4">
+          <VoiceDesignGeneratorSection
+            voicePrompt={voicePrompt}
+            onVoicePromptChange={setVoicePrompt}
+            previewText={previewText}
+            onPreviewTextChange={setPreviewText}
+            schemeCount={schemeCount}
+            onSchemeCountChange={setSchemeCount}
+            isSubmitting={isDesignSubmitting}
+            submittingState={designSubmittingState}
+            error={error}
+            generatedVoices={generatedVoices}
+            selectedIndex={selectedIndex}
+            onSelectIndex={setSelectedIndex}
+            playingIndex={playingIndex}
+            onPlayVoice={handlePlayVoice}
+            onGenerate={() => {
+              void handleGenerate()
+            }}
+            footer={(
+              <div className="flex gap-2 pt-2">
+                <button
+                  onClick={() => {
+                    void handleGenerate()
+                  }}
+                  disabled={isDesignSubmitting}
+                  className="glass-btn-base glass-btn-secondary flex-1 py-2 rounded-lg text-sm"
+                >
+                  {tv('regenerate')}
+                </button>
+                <button
+                  onClick={handleConfirmSelection}
+                  disabled={selectedIndex === null}
+                  className="glass-btn-base glass-btn-tone-success flex-1 py-2 rounded-lg disabled:opacity-50 disabled:cursor-not-allowed text-sm font-medium"
+                >
+                  {tv('confirmUse')}
+                </button>
+              </div>
+            )}
+          />
+        </div>
+      </div>
+
+      {showConfirmDialog && (
+        <div className="fixed inset-0 z-[10001] flex items-center justify-center p-4 glass-overlay">
+          <div className="glass-surface-modal w-full max-w-sm p-5 text-center">
+            <div className="w-12 h-12 mx-auto glass-chip glass-chip-warning rounded-full flex items-center justify-center mb-3 p-0">
+              <AppIcon name="alert" className="w-6 h-6 text-[var(--glass-tone-warning-fg)]" />
+            </div>
+            <h3 className="font-semibold text-[var(--glass-text-primary)] mb-1">{tv('confirmReplace')}</h3>
+            <p className="text-sm text-[var(--glass-text-secondary)] mb-4">
+              {tv('replaceWarning')}
+              <span className="font-medium text-[var(--glass-text-primary)]">「{speaker}」</span>
+            </p>
+            <div className="flex gap-2">
+              <button
+                onClick={() => setShowConfirmDialog(false)}
+                className="glass-btn-base glass-btn-secondary flex-1 py-2 rounded-lg text-sm"
+              >
+                {t('cancel')}
+              </button>
+              <button
+                onClick={doSave}
+                className="glass-btn-base glass-btn-danger flex-1 py-2 rounded-lg text-sm"
+              >
+                {tv('confirmReplaceBtn')}
+              </button>
+            </div>
+          </div>
+        </div>
+      )}
+    </>
+  )
+
+  return createPortal(dialogContent, document.body)
+}
--- a/src/components/voice/VoiceDesignGeneratorSection.tsx
+++ b/src/components/voice/VoiceDesignGeneratorSection.tsx
@@ -0,0 +1,225 @@
+'use client'
+
+import type { ReactNode } from 'react'
+import { useTranslations } from 'next-intl'
+import TaskStatusInline from '@/components/task/TaskStatusInline'
+import { AppIcon } from '@/components/ui/icons'
+import type { TaskPresentationState } from '@/lib/task/presentation'
+import {
+  MAX_VOICE_SCHEME_COUNT,
+  MIN_VOICE_SCHEME_COUNT,
+  normalizeVoiceSchemeCount,
+  type GeneratedVoice,
+} from './voice-design-shared'
+
+const VOICE_PRESET_KEYS = [
+  'maleBroadcaster',
+  'gentleFemale',
+  'matureMale',
+  'livelyFemale',
+  'intellectualFemale',
+  'narrator',
+] as const
+
+type VoicePresetKey = (typeof VOICE_PRESET_KEYS)[number]
+
+interface VoiceDesignGeneratorSectionProps {
+  voicePrompt: string
+  onVoicePromptChange: (value: string) => void
+  previewText: string
+  onPreviewTextChange: (value: string) => void
+  schemeCount: string
+  onSchemeCountChange: (value: string) => void
+  isSubmitting: boolean
+  submittingState: TaskPresentationState | null
+  error: string | null
+  generatedVoices: GeneratedVoice[]
+  selectedIndex: number | null
+  onSelectIndex: (index: number) => void
+  playingIndex: number | null
+  onPlayVoice: (index: number) => void
+  onGenerate: () => void
+  footer?: ReactNode
+}
+
+export default function VoiceDesignGeneratorSection({
+  voicePrompt,
+  onVoicePromptChange,
+  previewText,
+  onPreviewTextChange,
+  schemeCount,
+  onSchemeCountChange,
+  isSubmitting,
+  submittingState,
+  error,
+  generatedVoices,
+  selectedIndex,
+  onSelectIndex,
+  playingIndex,
+  onPlayVoice,
+  onGenerate,
+  footer = null,
+}: VoiceDesignGeneratorSectionProps) {
+  const tv = useTranslations('voice.voiceDesign')
+  const normalizedSchemeCount = normalizeVoiceSchemeCount(schemeCount)
+
+  return (
+    <>
+      <div>
+        <div className="text-sm text-[var(--glass-text-secondary)] mb-2">{tv('selectStyle')}</div>
+        <div className="flex flex-wrap gap-1.5">
+          {VOICE_PRESET_KEYS.map((presetKey) => {
+            const prompt = tv(`presetsPrompts.${presetKey}` as `presetsPrompts.${VoicePresetKey}`)
+            return (
+              <button
+                key={presetKey}
+                onClick={() => onVoicePromptChange(prompt)}
+                className={`glass-btn-base px-2.5 py-1 text-xs rounded-md border transition-all ${
+                  voicePrompt === prompt
+                    ? 'glass-btn-tone-info border-[var(--glass-stroke-focus)]'
+                    : 'glass-btn-soft text-[var(--glass-text-secondary)] border-[var(--glass-stroke-base)] hover:border-[var(--glass-stroke-focus)]'
+                }`}
+              >
+                {tv(`presets.${presetKey}` as `presets.${VoicePresetKey}`)}
+              </button>
+            )
+          })}
+        </div>
+      </div>
+
+      <div>
+        <div className="text-sm text-[var(--glass-text-secondary)] mb-1">{tv('orCustomDescription')}</div>
+        <textarea
+          value={voicePrompt}
+          onChange={(event) => onVoicePromptChange(event.target.value)}
+          placeholder={tv('describePlaceholder')}
+          className="glass-textarea-base w-full px-3 py-2 text-sm resize-none"
+          rows={2}
+        />
+      </div>
+
+      <details className="text-sm">
+        <summary className="text-[var(--glass-text-secondary)] cursor-pointer hover:text-[var(--glass-text-primary)]">
+          {tv('editPreviewText')}
+        </summary>
+        <input
+          type="text"
+          value={previewText}
+          onChange={(event) => onPreviewTextChange(event.target.value)}
+          placeholder={tv('defaultPreviewText')}
+          className="glass-input-base w-full mt-2 px-3 py-2 text-sm"
+        />
+      </details>
+
+      {generatedVoices.length === 0 && !isSubmitting && (
+        <div
+          role="button"
+          tabIndex={!voicePrompt.trim() ? -1 : 0}
+          aria-disabled={!voicePrompt.trim()}
+          onClick={() => {
+            if (!voicePrompt.trim()) return
+            onGenerate()
+          }}
+          onKeyDown={(event) => {
+            if (!voicePrompt.trim()) return
+            if (event.key === 'Enter' || event.key === ' ') {
+              event.preventDefault()
+              onGenerate()
+            }
+          }}
+          className={`glass-btn-base glass-btn-primary w-full py-2.5 rounded-lg text-sm font-medium transition-opacity ${
+            !voicePrompt.trim() ? 'opacity-50 cursor-not-allowed' : 'cursor-pointer'
+          }`}
+        >
+          <div className="flex items-center justify-center gap-2">
+            <span>{tv('generateSchemesPrefix')}</span>
+            <div
+              className="group relative inline-flex items-center rounded-md px-1.5 py-0.5 transition-colors hover:bg-white/12 focus-within:bg-white/14"
+              onClick={(event) => event.stopPropagation()}
+              onKeyDown={(event) => event.stopPropagation()}
+            >
+              <select
+                value={String(normalizedSchemeCount)}
+                onChange={(event) => onSchemeCountChange(event.target.value)}
+                aria-label={tv('schemeCountAriaLabel')}
+                className="appearance-none bg-transparent border-0 pl-0 pr-3 text-sm font-semibold text-white/96 outline-none cursor-pointer leading-none transition-colors group-hover:text-white focus:text-white"
+              >
+                {Array.from({ length: MAX_VOICE_SCHEME_COUNT - MIN_VOICE_SCHEME_COUNT + 1 }, (_, index) => {
+                  const value = String(index + MIN_VOICE_SCHEME_COUNT)
+                  return (
+                    <option key={value} value={value} className="text-black">
+                      {value}
+                    </option>
+                  )
+                })}
+              </select>
+              <div className="pointer-events-none absolute inset-y-0 right-1 flex items-center text-white/82 transition-colors group-hover:text-white group-focus-within:text-white">
+                <AppIcon name="chevronDown" className="h-3 w-3" />
+              </div>
+            </div>
+            <span>{tv('generateSchemesSuffix')}</span>
+          </div>
+        </div>
+      )}
+
+      {isSubmitting && submittingState && (
+        <div className="py-6">
+          <TaskStatusInline
+            state={submittingState}
+            className="justify-center text-[var(--glass-text-secondary)] [&>span]:text-[var(--glass-text-secondary)]"
+          />
+        </div>
+      )}
+
+      {generatedVoices.length > 0 && (
+        <div className="space-y-3">
+          <div className="text-sm text-[var(--glass-text-secondary)]">{tv('selectScheme')}</div>
+          <div className="grid grid-cols-3 gap-2">
+            {generatedVoices.map((voice, index) => (
+              <div
+                key={voice.voiceId}
+                onClick={() => onSelectIndex(index)}
+                className={`relative p-3 rounded-lg border-2 cursor-pointer transition-all text-center ${
+                  selectedIndex === index
+                    ? 'border-[var(--glass-stroke-focus)] bg-[var(--glass-tone-info-bg)]'
+                    : 'border-[var(--glass-stroke-base)] hover:border-[var(--glass-stroke-focus)]'
+                }`}
+              >
+                {selectedIndex === index && (
+                  <div className="absolute -top-1.5 -right-1.5 w-5 h-5 glass-chip glass-chip-info rounded-full flex items-center justify-center p-0">
+                    <AppIcon name="checkSolid" className="w-3 h-3 text-white" />
+                  </div>
+                )}
+                <div className="text-sm font-medium text-[var(--glass-text-primary)] mb-2">{tv('schemeN', { n: index + 1 })}</div>
+                <button
+                  onClick={(event) => {
+                    event.stopPropagation()
+                    onPlayVoice(index)
+                  }}
+                  className={`w-10 h-10 mx-auto rounded-full glass-btn-base flex items-center justify-center transition-all ${
+                    playingIndex === index
+                      ? 'glass-btn-tone-info animate-pulse'
+                      : 'glass-btn-secondary text-[var(--glass-text-secondary)]'
+                  }`}
+                >
+                  {playingIndex === index ? (
+                    <AppIcon name="pause" className="w-4 h-4" />
+                  ) : (
+                    <AppIcon name="play" className="w-5 h-5" />
+                  )}
+                </button>
+              </div>
+            ))}
+          </div>
+          {footer}
+        </div>
+      )}
+
+      {error && (
+        <div className="text-sm text-[var(--glass-tone-danger-fg)] bg-[var(--glass-tone-danger-bg)] px-3 py-2 rounded-lg">
+          {error}
+        </div>
+      )}
+    </>
+  )
+}
--- a/src/components/voice/voice-design-shared.ts
+++ b/src/components/voice/voice-design-shared.ts
@@ -0,0 +1,83 @@
+export const DEFAULT_VOICE_SCHEME_COUNT = 3
+export const MIN_VOICE_SCHEME_COUNT = 1
+export const MAX_VOICE_SCHEME_COUNT = 10
+
+export type VoiceDesignMutationPayload = {
+  voicePrompt: string
+  previewText: string
+  preferredName: string
+  language: 'zh'
+}
+
+export type VoiceDesignMutationResult = {
+  voiceId?: string
+  audioBase64?: string
+  detail?: string
+}
+
+export type GeneratedVoice = {
+  voiceId: string
+  audioBase64: string
+  audioUrl: string
+}
+
+export function normalizeVoiceSchemeCount(input: string | number | undefined): number {
+  const rawValue = typeof input === 'number' ? input : Number.parseInt(input ?? '', 10)
+  if (!Number.isFinite(rawValue)) return DEFAULT_VOICE_SCHEME_COUNT
+  return Math.min(MAX_VOICE_SCHEME_COUNT, Math.max(MIN_VOICE_SCHEME_COUNT, rawValue))
+}
+
+export function createVoiceDesignPreferredName(index: number, now: () => number = Date.now): string {
+  return `voice_${now().toString(36)}_${index + 1}`.slice(0, 16)
+}
+
+interface GenerateVoiceDesignOptionsParams {
+  count: string | number | undefined
+  voicePrompt: string
+  previewText: string
+  defaultPreviewText: string
+  language?: 'zh'
+  onDesignVoice: (payload: VoiceDesignMutationPayload) => Promise<VoiceDesignMutationResult>
+  createPreferredName?: (index: number) => string
+}
+
+export async function generateVoiceDesignOptions({
+  count,
+  voicePrompt,
+  previewText,
+  defaultPreviewText,
+  language = 'zh',
+  onDesignVoice,
+  createPreferredName = (index) => createVoiceDesignPreferredName(index),
+}: GenerateVoiceDesignOptionsParams): Promise<GeneratedVoice[]> {
+  const trimmedPrompt = voicePrompt.trim()
+  if (!trimmedPrompt) throw new Error('VOICE_PROMPT_REQUIRED')
+
+  const resolvedPreviewText = previewText.trim() || defaultPreviewText
+  const resolvedCount = normalizeVoiceSchemeCount(count)
+  const voices: GeneratedVoice[] = []
+
+  for (let index = 0; index < resolvedCount; index += 1) {
+    const result = await onDesignVoice({
+      voicePrompt: trimmedPrompt,
+      previewText: resolvedPreviewText,
+      preferredName: createPreferredName(index),
+      language,
+    })
+
+    if (!result.audioBase64) continue
+    if (typeof result.voiceId !== 'string' || result.voiceId.length === 0) {
+      throw new Error('VOICE_DESIGN_INVALID_RESPONSE: missing voiceId')
+    }
+
+    voices.push({
+      voiceId: result.voiceId,
+      audioBase64: result.audioBase64,
+      audioUrl: `data:audio/wav;base64,${result.audioBase64}`,
+    })
+  }
+
+  if (voices.length === 0) throw new Error('VOICE_DESIGN_EMPTY_RESULT')
+
+  return voices
+}