Files
waooplus/tests/unit/lipsync-preprocess.test.ts
2026-03-08 17:10:06 +08:00

200 lines
7.2 KiB
TypeScript

import { beforeEach, describe, expect, it, vi } from 'vitest'
const normalizeToOriginalMediaUrlMock = vi.hoisted(() => vi.fn(async (input: string) => input))
const uploadObjectMock = vi.hoisted(() => vi.fn(async () => 'voice/temp/lip-sync-preprocessed/test.wav'))
const getSignedUrlMock = vi.hoisted(() => vi.fn(() => '/api/storage/sign?key=voice%2Ftemp%2Flip-sync-preprocessed%2Ftest.wav'))
const toFetchableUrlMock = vi.hoisted(() => vi.fn((input: string) => {
if (input.startsWith('http://') || input.startsWith('https://') || input.startsWith('data:')) return input
if (input.startsWith('/')) return `https://public.example.com${input}`
return input
}))
vi.mock('@/lib/media/outbound-image', () => ({
normalizeToOriginalMediaUrl: normalizeToOriginalMediaUrlMock,
}))
vi.mock('@/lib/storage', () => ({
uploadObject: uploadObjectMock,
getSignedUrl: getSignedUrlMock,
}))
vi.mock('@/lib/storage/utils', () => ({
toFetchableUrl: toFetchableUrlMock,
}))
vi.mock('@/lib/logging/core', () => ({
logInfo: vi.fn(),
}))
import {
LIPSYNC_PREPROCESS_AUDIO_MIN_MS,
preprocessLipSyncParams,
} from '@/lib/lipsync/preprocess'
function buildWav(durationMs: number, sampleRate = 16000): Buffer {
const numChannels = 1
const bitsPerSample = 16
const blockAlign = (numChannels * bitsPerSample) / 8
const byteRate = sampleRate * blockAlign
const dataSize = Math.max(blockAlign, Math.round((durationMs / 1000) * byteRate))
const buffer = Buffer.alloc(44 + dataSize)
buffer.write('RIFF', 0, 'ascii')
buffer.writeUInt32LE(36 + dataSize, 4)
buffer.write('WAVE', 8, 'ascii')
buffer.write('fmt ', 12, 'ascii')
buffer.writeUInt32LE(16, 16)
buffer.writeUInt16LE(1, 20)
buffer.writeUInt16LE(numChannels, 22)
buffer.writeUInt32LE(sampleRate, 24)
buffer.writeUInt32LE(byteRate, 28)
buffer.writeUInt16LE(blockAlign, 32)
buffer.writeUInt16LE(bitsPerSample, 34)
buffer.write('data', 36, 'ascii')
buffer.writeUInt32LE(dataSize, 40)
return buffer
}
function buildMp4WithDuration(durationMs: number): Buffer {
const timescale = 1000
const duration = Math.max(1, Math.round(durationMs))
const mvhdPayload = Buffer.alloc(4 + 4 + 4 + 4 + 4)
mvhdPayload.writeUInt8(0, 0)
mvhdPayload.writeUInt32BE(0, 4)
mvhdPayload.writeUInt32BE(0, 8)
mvhdPayload.writeUInt32BE(timescale, 12)
mvhdPayload.writeUInt32BE(duration, 16)
const mvhdSize = 8 + mvhdPayload.length
const mvhd = Buffer.alloc(mvhdSize)
mvhd.writeUInt32BE(mvhdSize, 0)
mvhd.write('mvhd', 4, 'ascii')
mvhdPayload.copy(mvhd, 8)
const moovSize = 8 + mvhd.length
const moov = Buffer.alloc(moovSize)
moov.writeUInt32BE(moovSize, 0)
moov.write('moov', 4, 'ascii')
mvhd.copy(moov, 8)
const ftyp = Buffer.alloc(24)
ftyp.writeUInt32BE(24, 0)
ftyp.write('ftyp', 4, 'ascii')
ftyp.write('isom', 8, 'ascii')
ftyp.writeUInt32BE(0x200, 12)
ftyp.write('isom', 16, 'ascii')
ftyp.write('mp41', 20, 'ascii')
return Buffer.concat([ftyp, moov])
}
function readWavDurationMs(buffer: Buffer): number {
const byteRate = buffer.readUInt32LE(28)
const dataSize = buffer.readUInt32LE(40)
return Math.round((dataSize / byteRate) * 1000)
}
function buildBinaryResponse(buffer: Buffer, contentType: string): Response {
return {
ok: true,
status: 200,
headers: new Headers({
'content-type': contentType,
}),
arrayBuffer: async () => buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength),
text: async () => '',
} as unknown as Response
}
describe('lipsync preprocess', () => {
beforeEach(() => {
vi.clearAllMocks()
})
it('pads short audio to minimum duration for fal', async () => {
const shortAudio = buildWav(1000)
const video = buildMp4WithDuration(5000)
const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
const url = String(input)
if (url.includes('video.mp4')) return buildBinaryResponse(video, 'video/mp4')
if (url.includes('audio.wav')) return buildBinaryResponse(shortAudio, 'audio/wav')
throw new Error(`unexpected fetch: ${url}`)
})
vi.stubGlobal('fetch', fetchMock as unknown as typeof fetch)
const result = await preprocessLipSyncParams(
{
videoUrl: 'https://assets.example.com/video.mp4',
audioUrl: 'https://assets.example.com/audio.wav',
audioDurationMs: 1000,
},
{ providerKey: 'fal' },
)
expect(result.paddedAudio).toBe(true)
expect(result.trimmedAudio).toBe(false)
expect(result.params.audioUrl.startsWith('data:audio/wav;base64,')).toBe(true)
const base64 = result.params.audioUrl.slice('data:audio/wav;base64,'.length)
const paddedBuffer = Buffer.from(base64, 'base64')
expect(readWavDurationMs(paddedBuffer)).toBeGreaterThanOrEqual(LIPSYNC_PREPROCESS_AUDIO_MIN_MS)
expect(uploadObjectMock).not.toHaveBeenCalled()
})
it('trims audio to video duration for vidu and uploads processed audio', async () => {
const longAudio = buildWav(7000)
const video = buildMp4WithDuration(5000)
const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
const url = String(input)
if (url.includes('video.mp4')) return buildBinaryResponse(video, 'video/mp4')
if (url.includes('audio.wav')) return buildBinaryResponse(longAudio, 'audio/wav')
throw new Error(`unexpected fetch: ${url}`)
})
vi.stubGlobal('fetch', fetchMock as unknown as typeof fetch)
const result = await preprocessLipSyncParams(
{
videoUrl: 'https://assets.example.com/video.mp4',
audioUrl: 'https://assets.example.com/audio.wav',
audioDurationMs: 7000,
},
{ providerKey: 'vidu' },
)
expect(result.paddedAudio).toBe(false)
expect(result.trimmedAudio).toBe(true)
expect(uploadObjectMock).toHaveBeenCalledTimes(1)
const uploadCall = uploadObjectMock.mock.calls[0] as unknown as [Buffer] | undefined
expect(uploadCall).toBeTruthy()
if (!uploadCall) throw new Error('expected uploadObject call')
const uploadedBuffer = uploadCall[0]
expect(readWavDurationMs(uploadedBuffer)).toBeLessThanOrEqual(5000)
expect(result.params.audioUrl).toBe('https://public.example.com/api/storage/sign?key=voice%2Ftemp%2Flip-sync-preprocessed%2Ftest.wav')
})
it('probes durations and keeps audio unchanged when no adjustment is needed', async () => {
const audio = buildWav(3000)
const video = buildMp4WithDuration(5000)
const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
const url = String(input)
if (url.includes('video.mp4')) return buildBinaryResponse(video, 'video/mp4')
if (url.includes('audio.wav')) return buildBinaryResponse(audio, 'audio/wav')
throw new Error(`unexpected fetch: ${url}`)
})
vi.stubGlobal('fetch', fetchMock as unknown as typeof fetch)
const result = await preprocessLipSyncParams(
{
videoUrl: 'https://assets.example.com/video.mp4',
audioUrl: 'https://assets.example.com/audio.wav',
},
{ providerKey: 'bailian' },
)
expect(result.paddedAudio).toBe(false)
expect(result.trimmedAudio).toBe(false)
expect(result.params.audioUrl).toBe('https://assets.example.com/audio.wav')
expect(fetchMock).toHaveBeenCalled()
expect(uploadObjectMock).not.toHaveBeenCalled()
})
})