feat: initial release v0.3.0

This commit is contained in:
saturn
2026-03-08 03:15:27 +08:00
commit 881ed44996
1311 changed files with 225407 additions and 0 deletions

View File

@@ -0,0 +1,244 @@
'use client'
import { useRef, useState } from 'react'
import { createPortal } from 'react-dom'
import { useTranslations } from 'next-intl'
import { resolveTaskPresentationState } from '@/lib/task/presentation'
import { AppIcon } from '@/components/ui/icons'
import VoiceDesignGeneratorSection from './VoiceDesignGeneratorSection'
import {
DEFAULT_VOICE_SCHEME_COUNT,
generateVoiceDesignOptions,
type GeneratedVoice,
type VoiceDesignMutationPayload,
type VoiceDesignMutationResult,
} from './voice-design-shared'
export type { VoiceDesignMutationPayload, VoiceDesignMutationResult } from './voice-design-shared'
interface VoiceDesignDialogBaseProps {
isOpen: boolean
speaker: string
hasExistingVoice?: boolean
onClose: () => void
onSave: (voiceId: string, audioBase64: string) => void
onDesignVoice: (payload: VoiceDesignMutationPayload) => Promise<VoiceDesignMutationResult>
}
export default function VoiceDesignDialogBase({
isOpen,
speaker,
hasExistingVoice = false,
onClose,
onSave,
onDesignVoice,
}: VoiceDesignDialogBaseProps) {
const t = useTranslations('common')
const tv = useTranslations('voice.voiceDesign')
const [voicePrompt, setVoicePrompt] = useState('')
const [previewText, setPreviewText] = useState(tv('defaultPreviewText'))
const [schemeCount, setSchemeCount] = useState(String(DEFAULT_VOICE_SCHEME_COUNT))
const [isDesignSubmitting, setIsDesignSubmitting] = useState(false)
const [error, setError] = useState<string | null>(null)
const [generatedVoices, setGeneratedVoices] = useState<GeneratedVoice[]>([])
const [selectedIndex, setSelectedIndex] = useState<number | null>(null)
const [showConfirmDialog, setShowConfirmDialog] = useState(false)
const [playingIndex, setPlayingIndex] = useState<number | null>(null)
const audioRef = useRef<HTMLAudioElement | null>(null)
const designSubmittingState = isDesignSubmitting
? resolveTaskPresentationState({
phase: 'processing',
intent: 'generate',
resource: 'audio',
hasOutput: false,
})
: null
const handleGenerate = async () => {
if (!voicePrompt.trim()) {
setError(tv('pleaseSelectStyle'))
return
}
setIsDesignSubmitting(true)
setError(null)
setGeneratedVoices([])
setSelectedIndex(null)
try {
const voices = await generateVoiceDesignOptions({
count: schemeCount,
voicePrompt,
previewText,
defaultPreviewText: tv('defaultPreviewText'),
onDesignVoice,
})
setGeneratedVoices(voices)
} catch (err: unknown) {
const status = err instanceof Error ? (err as Error & { status?: number }).status : undefined
if (status === 402) {
const detail = err instanceof Error ? (err as Error & { detail?: string }).detail : undefined
alert(t('insufficientBalance') + '\n\n' + (detail || t('insufficientBalanceDetail')))
setError('INSUFFICIENT_BALANCE')
return
}
const message = err instanceof Error ? err.message : tv('generationError')
setError(message === 'VOICE_DESIGN_EMPTY_RESULT' ? tv('noVoiceGenerated') : (message || tv('generationError')))
} finally {
setIsDesignSubmitting(false)
}
}
const handlePlayVoice = (index: number) => {
if (playingIndex === index && audioRef.current) {
audioRef.current.pause()
setPlayingIndex(null)
return
}
if (audioRef.current) {
audioRef.current.pause()
}
setPlayingIndex(index)
const audio = new Audio(generatedVoices[index].audioUrl)
audioRef.current = audio
audio.onended = () => setPlayingIndex(null)
audio.onerror = () => setPlayingIndex(null)
void audio.play()
}
const handleConfirmSelection = () => {
if (selectedIndex !== null && generatedVoices[selectedIndex]) {
if (hasExistingVoice) {
setShowConfirmDialog(true)
} else {
doSave()
}
}
}
const doSave = () => {
if (selectedIndex !== null && generatedVoices[selectedIndex]) {
const voice = generatedVoices[selectedIndex]
onSave(voice.voiceId, voice.audioBase64)
handleClose()
}
}
const handleClose = () => {
setVoicePrompt('')
setPreviewText(tv('defaultPreviewText'))
setSchemeCount(String(DEFAULT_VOICE_SCHEME_COUNT))
setError(null)
setGeneratedVoices([])
setSelectedIndex(null)
setShowConfirmDialog(false)
setPlayingIndex(null)
if (audioRef.current) {
audioRef.current.pause()
}
onClose()
}
if (!isOpen) return null
if (typeof document === 'undefined') return null
const dialogContent = (
<>
<div className="fixed inset-0 z-[9999] glass-overlay" onClick={handleClose} />
<div
className="fixed z-[10000] left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2 glass-surface-modal w-full max-w-xl overflow-hidden"
onClick={(event) => event.stopPropagation()}
>
<div className="flex items-center justify-between px-5 py-3 border-b border-[var(--glass-stroke-base)] bg-[var(--glass-bg-surface-strong)]">
<div className="flex items-center gap-2">
<AppIcon name="mic" className="w-5 h-5 text-[var(--glass-tone-info-fg)]" />
<h2 className="font-semibold text-[var(--glass-text-primary)]">{tv('designVoiceFor', { speaker })}</h2>
{hasExistingVoice && (
<span className="glass-chip glass-chip-warning text-xs px-1.5 py-0.5">{tv('hasExistingVoice')}</span>
)}
</div>
<button onClick={handleClose} className="glass-btn-base glass-btn-soft p-1 text-[var(--glass-text-tertiary)]">
<AppIcon name="close" className="w-5 h-5" />
</button>
</div>
<div className="p-5 space-y-4">
<VoiceDesignGeneratorSection
voicePrompt={voicePrompt}
onVoicePromptChange={setVoicePrompt}
previewText={previewText}
onPreviewTextChange={setPreviewText}
schemeCount={schemeCount}
onSchemeCountChange={setSchemeCount}
isSubmitting={isDesignSubmitting}
submittingState={designSubmittingState}
error={error}
generatedVoices={generatedVoices}
selectedIndex={selectedIndex}
onSelectIndex={setSelectedIndex}
playingIndex={playingIndex}
onPlayVoice={handlePlayVoice}
onGenerate={() => {
void handleGenerate()
}}
footer={(
<div className="flex gap-2 pt-2">
<button
onClick={() => {
void handleGenerate()
}}
disabled={isDesignSubmitting}
className="glass-btn-base glass-btn-secondary flex-1 py-2 rounded-lg text-sm"
>
{tv('regenerate')}
</button>
<button
onClick={handleConfirmSelection}
disabled={selectedIndex === null}
className="glass-btn-base glass-btn-tone-success flex-1 py-2 rounded-lg disabled:opacity-50 disabled:cursor-not-allowed text-sm font-medium"
>
{tv('confirmUse')}
</button>
</div>
)}
/>
</div>
</div>
{showConfirmDialog && (
<div className="fixed inset-0 z-[10001] flex items-center justify-center p-4 glass-overlay">
<div className="glass-surface-modal w-full max-w-sm p-5 text-center">
<div className="w-12 h-12 mx-auto glass-chip glass-chip-warning rounded-full flex items-center justify-center mb-3 p-0">
<AppIcon name="alert" className="w-6 h-6 text-[var(--glass-tone-warning-fg)]" />
</div>
<h3 className="font-semibold text-[var(--glass-text-primary)] mb-1">{tv('confirmReplace')}</h3>
<p className="text-sm text-[var(--glass-text-secondary)] mb-4">
{tv('replaceWarning')}
<span className="font-medium text-[var(--glass-text-primary)]">{speaker}</span>
</p>
<div className="flex gap-2">
<button
onClick={() => setShowConfirmDialog(false)}
className="glass-btn-base glass-btn-secondary flex-1 py-2 rounded-lg text-sm"
>
{t('cancel')}
</button>
<button
onClick={doSave}
className="glass-btn-base glass-btn-danger flex-1 py-2 rounded-lg text-sm"
>
{tv('confirmReplaceBtn')}
</button>
</div>
</div>
</div>
)}
</>
)
return createPortal(dialogContent, document.body)
}

View File

@@ -0,0 +1,225 @@
'use client'
import type { ReactNode } from 'react'
import { useTranslations } from 'next-intl'
import TaskStatusInline from '@/components/task/TaskStatusInline'
import { AppIcon } from '@/components/ui/icons'
import type { TaskPresentationState } from '@/lib/task/presentation'
import {
MAX_VOICE_SCHEME_COUNT,
MIN_VOICE_SCHEME_COUNT,
normalizeVoiceSchemeCount,
type GeneratedVoice,
} from './voice-design-shared'
const VOICE_PRESET_KEYS = [
'maleBroadcaster',
'gentleFemale',
'matureMale',
'livelyFemale',
'intellectualFemale',
'narrator',
] as const
type VoicePresetKey = (typeof VOICE_PRESET_KEYS)[number]
interface VoiceDesignGeneratorSectionProps {
voicePrompt: string
onVoicePromptChange: (value: string) => void
previewText: string
onPreviewTextChange: (value: string) => void
schemeCount: string
onSchemeCountChange: (value: string) => void
isSubmitting: boolean
submittingState: TaskPresentationState | null
error: string | null
generatedVoices: GeneratedVoice[]
selectedIndex: number | null
onSelectIndex: (index: number) => void
playingIndex: number | null
onPlayVoice: (index: number) => void
onGenerate: () => void
footer?: ReactNode
}
export default function VoiceDesignGeneratorSection({
voicePrompt,
onVoicePromptChange,
previewText,
onPreviewTextChange,
schemeCount,
onSchemeCountChange,
isSubmitting,
submittingState,
error,
generatedVoices,
selectedIndex,
onSelectIndex,
playingIndex,
onPlayVoice,
onGenerate,
footer = null,
}: VoiceDesignGeneratorSectionProps) {
const tv = useTranslations('voice.voiceDesign')
const normalizedSchemeCount = normalizeVoiceSchemeCount(schemeCount)
return (
<>
<div>
<div className="text-sm text-[var(--glass-text-secondary)] mb-2">{tv('selectStyle')}</div>
<div className="flex flex-wrap gap-1.5">
{VOICE_PRESET_KEYS.map((presetKey) => {
const prompt = tv(`presetsPrompts.${presetKey}` as `presetsPrompts.${VoicePresetKey}`)
return (
<button
key={presetKey}
onClick={() => onVoicePromptChange(prompt)}
className={`glass-btn-base px-2.5 py-1 text-xs rounded-md border transition-all ${
voicePrompt === prompt
? 'glass-btn-tone-info border-[var(--glass-stroke-focus)]'
: 'glass-btn-soft text-[var(--glass-text-secondary)] border-[var(--glass-stroke-base)] hover:border-[var(--glass-stroke-focus)]'
}`}
>
{tv(`presets.${presetKey}` as `presets.${VoicePresetKey}`)}
</button>
)
})}
</div>
</div>
<div>
<div className="text-sm text-[var(--glass-text-secondary)] mb-1">{tv('orCustomDescription')}</div>
<textarea
value={voicePrompt}
onChange={(event) => onVoicePromptChange(event.target.value)}
placeholder={tv('describePlaceholder')}
className="glass-textarea-base w-full px-3 py-2 text-sm resize-none"
rows={2}
/>
</div>
<details className="text-sm">
<summary className="text-[var(--glass-text-secondary)] cursor-pointer hover:text-[var(--glass-text-primary)]">
{tv('editPreviewText')}
</summary>
<input
type="text"
value={previewText}
onChange={(event) => onPreviewTextChange(event.target.value)}
placeholder={tv('defaultPreviewText')}
className="glass-input-base w-full mt-2 px-3 py-2 text-sm"
/>
</details>
{generatedVoices.length === 0 && !isSubmitting && (
<div
role="button"
tabIndex={!voicePrompt.trim() ? -1 : 0}
aria-disabled={!voicePrompt.trim()}
onClick={() => {
if (!voicePrompt.trim()) return
onGenerate()
}}
onKeyDown={(event) => {
if (!voicePrompt.trim()) return
if (event.key === 'Enter' || event.key === ' ') {
event.preventDefault()
onGenerate()
}
}}
className={`glass-btn-base glass-btn-primary w-full py-2.5 rounded-lg text-sm font-medium transition-opacity ${
!voicePrompt.trim() ? 'opacity-50 cursor-not-allowed' : 'cursor-pointer'
}`}
>
<div className="flex items-center justify-center gap-2">
<span>{tv('generateSchemesPrefix')}</span>
<div
className="group relative inline-flex items-center rounded-md px-1.5 py-0.5 transition-colors hover:bg-white/12 focus-within:bg-white/14"
onClick={(event) => event.stopPropagation()}
onKeyDown={(event) => event.stopPropagation()}
>
<select
value={String(normalizedSchemeCount)}
onChange={(event) => onSchemeCountChange(event.target.value)}
aria-label={tv('schemeCountAriaLabel')}
className="appearance-none bg-transparent border-0 pl-0 pr-3 text-sm font-semibold text-white/96 outline-none cursor-pointer leading-none transition-colors group-hover:text-white focus:text-white"
>
{Array.from({ length: MAX_VOICE_SCHEME_COUNT - MIN_VOICE_SCHEME_COUNT + 1 }, (_, index) => {
const value = String(index + MIN_VOICE_SCHEME_COUNT)
return (
<option key={value} value={value} className="text-black">
{value}
</option>
)
})}
</select>
<div className="pointer-events-none absolute inset-y-0 right-1 flex items-center text-white/82 transition-colors group-hover:text-white group-focus-within:text-white">
<AppIcon name="chevronDown" className="h-3 w-3" />
</div>
</div>
<span>{tv('generateSchemesSuffix')}</span>
</div>
</div>
)}
{isSubmitting && submittingState && (
<div className="py-6">
<TaskStatusInline
state={submittingState}
className="justify-center text-[var(--glass-text-secondary)] [&>span]:text-[var(--glass-text-secondary)]"
/>
</div>
)}
{generatedVoices.length > 0 && (
<div className="space-y-3">
<div className="text-sm text-[var(--glass-text-secondary)]">{tv('selectScheme')}</div>
<div className="grid grid-cols-3 gap-2">
{generatedVoices.map((voice, index) => (
<div
key={voice.voiceId}
onClick={() => onSelectIndex(index)}
className={`relative p-3 rounded-lg border-2 cursor-pointer transition-all text-center ${
selectedIndex === index
? 'border-[var(--glass-stroke-focus)] bg-[var(--glass-tone-info-bg)]'
: 'border-[var(--glass-stroke-base)] hover:border-[var(--glass-stroke-focus)]'
}`}
>
{selectedIndex === index && (
<div className="absolute -top-1.5 -right-1.5 w-5 h-5 glass-chip glass-chip-info rounded-full flex items-center justify-center p-0">
<AppIcon name="checkSolid" className="w-3 h-3 text-white" />
</div>
)}
<div className="text-sm font-medium text-[var(--glass-text-primary)] mb-2">{tv('schemeN', { n: index + 1 })}</div>
<button
onClick={(event) => {
event.stopPropagation()
onPlayVoice(index)
}}
className={`w-10 h-10 mx-auto rounded-full glass-btn-base flex items-center justify-center transition-all ${
playingIndex === index
? 'glass-btn-tone-info animate-pulse'
: 'glass-btn-secondary text-[var(--glass-text-secondary)]'
}`}
>
{playingIndex === index ? (
<AppIcon name="pause" className="w-4 h-4" />
) : (
<AppIcon name="play" className="w-5 h-5" />
)}
</button>
</div>
))}
</div>
{footer}
</div>
)}
{error && (
<div className="text-sm text-[var(--glass-tone-danger-fg)] bg-[var(--glass-tone-danger-bg)] px-3 py-2 rounded-lg">
{error}
</div>
)}
</>
)
}

View File

@@ -0,0 +1,83 @@
export const DEFAULT_VOICE_SCHEME_COUNT = 3
export const MIN_VOICE_SCHEME_COUNT = 1
export const MAX_VOICE_SCHEME_COUNT = 10
export type VoiceDesignMutationPayload = {
voicePrompt: string
previewText: string
preferredName: string
language: 'zh'
}
export type VoiceDesignMutationResult = {
voiceId?: string
audioBase64?: string
detail?: string
}
export type GeneratedVoice = {
voiceId: string
audioBase64: string
audioUrl: string
}
export function normalizeVoiceSchemeCount(input: string | number | undefined): number {
const rawValue = typeof input === 'number' ? input : Number.parseInt(input ?? '', 10)
if (!Number.isFinite(rawValue)) return DEFAULT_VOICE_SCHEME_COUNT
return Math.min(MAX_VOICE_SCHEME_COUNT, Math.max(MIN_VOICE_SCHEME_COUNT, rawValue))
}
export function createVoiceDesignPreferredName(index: number, now: () => number = Date.now): string {
return `voice_${now().toString(36)}_${index + 1}`.slice(0, 16)
}
interface GenerateVoiceDesignOptionsParams {
count: string | number | undefined
voicePrompt: string
previewText: string
defaultPreviewText: string
language?: 'zh'
onDesignVoice: (payload: VoiceDesignMutationPayload) => Promise<VoiceDesignMutationResult>
createPreferredName?: (index: number) => string
}
export async function generateVoiceDesignOptions({
count,
voicePrompt,
previewText,
defaultPreviewText,
language = 'zh',
onDesignVoice,
createPreferredName = (index) => createVoiceDesignPreferredName(index),
}: GenerateVoiceDesignOptionsParams): Promise<GeneratedVoice[]> {
const trimmedPrompt = voicePrompt.trim()
if (!trimmedPrompt) throw new Error('VOICE_PROMPT_REQUIRED')
const resolvedPreviewText = previewText.trim() || defaultPreviewText
const resolvedCount = normalizeVoiceSchemeCount(count)
const voices: GeneratedVoice[] = []
for (let index = 0; index < resolvedCount; index += 1) {
const result = await onDesignVoice({
voicePrompt: trimmedPrompt,
previewText: resolvedPreviewText,
preferredName: createPreferredName(index),
language,
})
if (!result.audioBase64) continue
if (typeof result.voiceId !== 'string' || result.voiceId.length === 0) {
throw new Error('VOICE_DESIGN_INVALID_RESPONSE: missing voiceId')
}
voices.push({
voiceId: result.voiceId,
audioBase64: result.audioBase64,
audioUrl: `data:audio/wav;base64,${result.audioBase64}`,
})
}
if (voices.length === 0) throw new Error('VOICE_DESIGN_EMPTY_RESULT')
return voices
}