mirror of
https://github.com/simstudioai/sim.git
synced 2026-02-12 07:24:55 -05:00
327 lines
8.6 KiB
TypeScript
327 lines
8.6 KiB
TypeScript
import { execSync } from 'node:child_process'
|
|
import fsSync from 'node:fs'
|
|
import fs from 'node:fs/promises'
|
|
import os from 'node:os'
|
|
import path from 'node:path'
|
|
import ffmpegStatic from 'ffmpeg-static'
|
|
import ffmpeg from 'fluent-ffmpeg'
|
|
import type {
|
|
AudioExtractionOptions,
|
|
AudioExtractionResult,
|
|
AudioMetadata,
|
|
} from '@/lib/audio/types'
|
|
|
|
let ffmpegInitialized = false
|
|
let ffmpegPath: string | null = null
|
|
|
|
/**
|
|
* Lazy initialization of FFmpeg - only runs when needed, not at module load
|
|
*/
|
|
function ensureFfmpeg(): void {
|
|
if (ffmpegInitialized) {
|
|
if (!ffmpegPath) {
|
|
throw new Error(
|
|
'FFmpeg not found. Install: brew install ffmpeg (macOS) / apk add ffmpeg (Alpine) / apt-get install ffmpeg (Ubuntu)'
|
|
)
|
|
}
|
|
return
|
|
}
|
|
|
|
ffmpegInitialized = true
|
|
|
|
// Try ffmpeg-static binary
|
|
if (ffmpegStatic && typeof ffmpegStatic === 'string') {
|
|
try {
|
|
fsSync.accessSync(ffmpegStatic, fsSync.constants.X_OK)
|
|
ffmpegPath = ffmpegStatic
|
|
ffmpeg.setFfmpegPath(ffmpegPath)
|
|
console.log('[FFmpeg] Using ffmpeg-static:', ffmpegPath)
|
|
return
|
|
} catch {
|
|
// Binary doesn't exist or not executable
|
|
}
|
|
}
|
|
|
|
// Try system ffmpeg (cross-platform)
|
|
try {
|
|
const cmd = process.platform === 'win32' ? 'where ffmpeg' : 'which ffmpeg'
|
|
const result = execSync(cmd, { encoding: 'utf-8' }).trim()
|
|
// On Windows, 'where' returns multiple paths - take first
|
|
ffmpegPath = result.split('\n')[0]
|
|
ffmpeg.setFfmpegPath(ffmpegPath)
|
|
console.log('[FFmpeg] Using system ffmpeg:', ffmpegPath)
|
|
return
|
|
} catch {
|
|
// System ffmpeg not found
|
|
}
|
|
|
|
// No FFmpeg found - set flag but don't throw yet
|
|
// Error will be thrown when user tries to use video extraction
|
|
console.warn('[FFmpeg] No FFmpeg binary found at module load time')
|
|
}
|
|
|
|
/**
|
|
* Extract audio from video or convert audio format using FFmpeg
|
|
*/
|
|
export async function extractAudioFromVideo(
|
|
inputBuffer: Buffer,
|
|
mimeType: string,
|
|
options: AudioExtractionOptions = {}
|
|
): Promise<AudioExtractionResult> {
|
|
// Initialize FFmpeg on first use
|
|
ensureFfmpeg()
|
|
const isVideo = mimeType.startsWith('video/')
|
|
const isAudio = mimeType.startsWith('audio/')
|
|
|
|
// If it's already audio and no conversion needed, get metadata and return
|
|
if (isAudio && !options.outputFormat) {
|
|
try {
|
|
const metadata = await getAudioMetadata(inputBuffer, mimeType)
|
|
return {
|
|
buffer: inputBuffer,
|
|
format: mimeType.split('/')[1] || 'unknown',
|
|
duration: metadata.duration || 0,
|
|
size: inputBuffer.length,
|
|
}
|
|
} catch (error) {
|
|
// If metadata extraction fails, still return the buffer
|
|
return {
|
|
buffer: inputBuffer,
|
|
format: mimeType.split('/')[1] || 'unknown',
|
|
duration: 0,
|
|
size: inputBuffer.length,
|
|
}
|
|
}
|
|
}
|
|
|
|
// For video or audio conversion, use ffmpeg
|
|
if (isVideo || options.outputFormat) {
|
|
return await convertAudioWithFFmpeg(inputBuffer, mimeType, options)
|
|
}
|
|
|
|
// Fallback
|
|
return {
|
|
buffer: inputBuffer,
|
|
format: options.outputFormat || mimeType.split('/')[1] || 'unknown',
|
|
duration: 0,
|
|
size: inputBuffer.length,
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert audio/video using FFmpeg
|
|
*/
|
|
async function convertAudioWithFFmpeg(
|
|
inputBuffer: Buffer,
|
|
mimeType: string,
|
|
options: AudioExtractionOptions
|
|
): Promise<AudioExtractionResult> {
|
|
// Create temporary files
|
|
const tempDir = os.tmpdir()
|
|
const inputExt = getExtensionFromMimeType(mimeType)
|
|
const outputFormat = options.outputFormat || 'mp3'
|
|
const inputFile = path.join(tempDir, `ffmpeg-input-${Date.now()}.${inputExt}`)
|
|
const outputFile = path.join(tempDir, `ffmpeg-output-${Date.now()}.${outputFormat}`)
|
|
|
|
try {
|
|
// Write input buffer to temporary file
|
|
await fs.writeFile(inputFile, inputBuffer)
|
|
|
|
// Get metadata for duration
|
|
let duration = 0
|
|
try {
|
|
const metadata = await getAudioMetadataFromFile(inputFile)
|
|
duration = metadata.duration || 0
|
|
} catch (error) {
|
|
// Metadata extraction failed, continue without duration
|
|
console.warn('Failed to extract metadata:', error)
|
|
}
|
|
|
|
// Convert using FFmpeg
|
|
await new Promise<void>((resolve, reject) => {
|
|
let command = ffmpeg(inputFile).toFormat(outputFormat).audioCodec(getAudioCodec(outputFormat))
|
|
|
|
// Apply audio options
|
|
if (options.channels) {
|
|
command = command.audioChannels(options.channels)
|
|
}
|
|
if (options.sampleRate) {
|
|
command = command.audioFrequency(options.sampleRate)
|
|
}
|
|
if (options.bitrate) {
|
|
command = command.audioBitrate(options.bitrate)
|
|
}
|
|
|
|
command
|
|
.on('end', () => resolve())
|
|
.on('error', (err) => reject(new Error(`FFmpeg error: ${err.message}`)))
|
|
.save(outputFile)
|
|
})
|
|
|
|
// Read output file
|
|
const outputBuffer = await fs.readFile(outputFile)
|
|
|
|
return {
|
|
buffer: outputBuffer,
|
|
format: outputFormat,
|
|
duration,
|
|
size: outputBuffer.length,
|
|
}
|
|
} finally {
|
|
// Clean up temporary files
|
|
try {
|
|
await fs.unlink(inputFile).catch(() => {})
|
|
await fs.unlink(outputFile).catch(() => {})
|
|
} catch (error) {
|
|
// Ignore cleanup errors
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get audio metadata using ffprobe
|
|
*/
|
|
export async function getAudioMetadata(buffer: Buffer, mimeType: string): Promise<AudioMetadata> {
|
|
ensureFfmpeg() // Initialize FFmpeg/ffprobe
|
|
const tempDir = os.tmpdir()
|
|
const inputExt = getExtensionFromMimeType(mimeType)
|
|
const inputFile = path.join(tempDir, `ffprobe-input-${Date.now()}.${inputExt}`)
|
|
|
|
try {
|
|
// Write buffer to temporary file
|
|
await fs.writeFile(inputFile, buffer)
|
|
|
|
// Get metadata using ffprobe
|
|
return await getAudioMetadataFromFile(inputFile)
|
|
} finally {
|
|
// Clean up temporary file
|
|
try {
|
|
await fs.unlink(inputFile).catch(() => {})
|
|
} catch (error) {
|
|
// Ignore cleanup errors
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get audio metadata from a file path using ffprobe
|
|
*/
|
|
async function getAudioMetadataFromFile(filePath: string): Promise<AudioMetadata> {
|
|
ensureFfmpeg() // Initialize FFmpeg/ffprobe
|
|
return new Promise((resolve, reject) => {
|
|
ffmpeg.ffprobe(filePath, (err, metadata) => {
|
|
if (err) {
|
|
reject(new Error(`FFprobe error: ${err.message}`))
|
|
return
|
|
}
|
|
|
|
const audioStream = metadata.streams.find((s) => s.codec_type === 'audio')
|
|
const format = metadata.format
|
|
|
|
resolve({
|
|
duration: format.duration || 0,
|
|
format: format.format_name || 'unknown',
|
|
codec: audioStream?.codec_name,
|
|
sampleRate: audioStream?.sample_rate,
|
|
channels: audioStream?.channels,
|
|
bitrate: format.bit_rate ? Number(format.bit_rate) : undefined,
|
|
})
|
|
})
|
|
})
|
|
}
|
|
|
|
/**
|
|
* Get file extension from MIME type
|
|
*/
|
|
function getExtensionFromMimeType(mimeType: string): string {
|
|
const mimeToExt: Record<string, string> = {
|
|
// Video
|
|
'video/mp4': 'mp4',
|
|
'video/quicktime': 'mov',
|
|
'video/x-msvideo': 'avi',
|
|
'video/x-matroska': 'mkv',
|
|
'video/webm': 'webm',
|
|
// Audio
|
|
'audio/mpeg': 'mp3',
|
|
'audio/mp4': 'm4a',
|
|
'audio/wav': 'wav',
|
|
'audio/webm': 'webm',
|
|
'audio/ogg': 'ogg',
|
|
'audio/flac': 'flac',
|
|
'audio/aac': 'aac',
|
|
'audio/opus': 'opus',
|
|
}
|
|
|
|
return mimeToExt[mimeType] || mimeType.split('/')[1] || 'dat'
|
|
}
|
|
|
|
/**
|
|
* Get appropriate audio codec for output format
|
|
*/
|
|
function getAudioCodec(format: string): string {
|
|
const codecMap: Record<string, string> = {
|
|
mp3: 'libmp3lame',
|
|
wav: 'pcm_s16le',
|
|
flac: 'flac',
|
|
m4a: 'aac',
|
|
aac: 'aac',
|
|
ogg: 'libvorbis',
|
|
opus: 'libopus',
|
|
}
|
|
|
|
return codecMap[format] || 'libmp3lame'
|
|
}
|
|
|
|
/**
|
|
* Check if a file is a video file
|
|
*/
|
|
export function isVideoFile(mimeType: string): boolean {
|
|
return mimeType.startsWith('video/')
|
|
}
|
|
|
|
/**
|
|
* Check if a file is an audio file
|
|
*/
|
|
export function isAudioFile(mimeType: string): boolean {
|
|
return mimeType.startsWith('audio/')
|
|
}
|
|
|
|
/**
|
|
* Get optimal audio format for STT provider
|
|
*/
|
|
export function getOptimalFormat(provider: 'whisper' | 'deepgram' | 'elevenlabs'): {
|
|
format: 'mp3' | 'wav' | 'flac'
|
|
sampleRate: number
|
|
channels: 1 | 2
|
|
} {
|
|
switch (provider) {
|
|
case 'whisper':
|
|
// Whisper prefers 16kHz mono
|
|
return {
|
|
format: 'mp3',
|
|
sampleRate: 16000,
|
|
channels: 1,
|
|
}
|
|
case 'deepgram':
|
|
// Deepgram works well with various formats
|
|
return {
|
|
format: 'mp3',
|
|
sampleRate: 16000,
|
|
channels: 1,
|
|
}
|
|
case 'elevenlabs':
|
|
// ElevenLabs format preferences
|
|
return {
|
|
format: 'mp3',
|
|
sampleRate: 16000,
|
|
channels: 1,
|
|
}
|
|
default:
|
|
return {
|
|
format: 'mp3',
|
|
sampleRate: 16000,
|
|
channels: 1,
|
|
}
|
|
}
|
|
}
|