From 8353f3e3d175cb59b94327e5d5fa369290ce3c3f Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Sat, 19 Apr 2025 15:42:46 -0700 Subject: [PATCH] feat(tools): added elevenlabs tools/block, added audio-player console component, modified proxy routes (#282) --- .../api/{proxy-image => proxy/image}/route.ts | 7 +- sim/app/api/proxy/tts/route.ts | 63 +++++++++ .../components/audio-player/audio-player.tsx | 121 ++++++++++++++++++ .../components/json-view/json-view.tsx | 19 ++- .../components/tool-input/tool-input.tsx | 42 ++++-- sim/blocks/blocks/elevenlabs.ts | 97 ++++++++++++++ sim/blocks/index.ts | 3 + sim/components/icons.tsx | 9 ++ sim/tools/elevenlabs/index.ts | 3 + sim/tools/elevenlabs/tts.ts | 75 +++++++++++ sim/tools/elevenlabs/types.ts | 15 +++ sim/tools/openai/dalle.ts | 45 +++---- sim/tools/registry.ts | 2 + 13 files changed, 464 insertions(+), 37 deletions(-) rename sim/app/api/{proxy-image => proxy/image}/route.ts (89%) create mode 100644 sim/app/api/proxy/tts/route.ts create mode 100644 sim/app/w/[id]/components/panel/components/console/components/audio-player/audio-player.tsx create mode 100644 sim/blocks/blocks/elevenlabs.ts create mode 100644 sim/tools/elevenlabs/index.ts create mode 100644 sim/tools/elevenlabs/tts.ts create mode 100644 sim/tools/elevenlabs/types.ts diff --git a/sim/app/api/proxy-image/route.ts b/sim/app/api/proxy/image/route.ts similarity index 89% rename from sim/app/api/proxy-image/route.ts rename to sim/app/api/proxy/image/route.ts index 4db842a740..a582dbaf2b 100644 --- a/sim/app/api/proxy-image/route.ts +++ b/sim/app/api/proxy/image/route.ts @@ -1,4 +1,7 @@ import { NextResponse } from 'next/server' +import { createLogger } from '@/lib/logs/console-logger' + +const logger = createLogger('ProxyImage') export async function GET(request: Request) { try { @@ -6,11 +9,11 @@ export async function GET(request: Request) { const imageUrl = searchParams.get('url') if (!imageUrl) { - console.error('Missing URL parameter in proxy-image request') + logger.error('Missing URL parameter in proxy image request') return new NextResponse('Missing URL parameter', { status: 400 }) } - console.log('Proxying image from:', imageUrl) + logger.info('Proxying image from:', imageUrl) // Add appropriate headers for fetching images const response = await fetch(imageUrl, { diff --git a/sim/app/api/proxy/tts/route.ts b/sim/app/api/proxy/tts/route.ts new file mode 100644 index 0000000000..f4d49ee03f --- /dev/null +++ b/sim/app/api/proxy/tts/route.ts @@ -0,0 +1,63 @@ +import { NextResponse } from 'next/server' +import { createLogger } from '@/lib/logs/console-logger' + +const logger = createLogger('ProxyTTS') + +export async function POST(request: Request) { + try { + const body = await request.json() + const { text, voiceId, apiKey, modelId = 'eleven_monolingual_v1' } = body + + if (!text || !voiceId || !apiKey) { + return new NextResponse('Missing required parameters', { status: 400 }) + } + + logger.info('Proxying TTS request for voice:', voiceId) + + const endpoint = `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}` + + const response = await fetch(endpoint, { + method: 'POST', + headers: { + 'Accept': 'audio/mpeg', + 'Content-Type': 'application/json', + 'xi-api-key': apiKey, + }, + body: JSON.stringify({ + text, + model_id: modelId, + }), + // Set a reasonable timeout + signal: AbortSignal.timeout(20000), + }) + + if (!response.ok) { + logger.error(`Failed to generate TTS: ${response.status} ${response.statusText}`) + return new NextResponse(`Failed to generate TTS: ${response.status} ${response.statusText}`, { + status: response.status + }) + } + + const audioBlob = await response.blob() + + if (audioBlob.size === 0) { + logger.error('Empty audio received from ElevenLabs') + return new NextResponse('Empty audio received', { status: 422 }) + } + + return new NextResponse(audioBlob, { + headers: { + 'Content-Type': 'audio/mpeg', + 'Cache-Control': 'public, max-age=86400', // Cache for a day + 'Access-Control-Allow-Origin': '*', // CORS support + }, + }) + } catch (error) { + logger.error('Error proxying TTS:', error) + + return new NextResponse( + `Internal Server Error: ${error instanceof Error ? error.message : 'Unknown error'}`, + { status: 500 } + ) + } +} diff --git a/sim/app/w/[id]/components/panel/components/console/components/audio-player/audio-player.tsx b/sim/app/w/[id]/components/panel/components/console/components/audio-player/audio-player.tsx new file mode 100644 index 0000000000..6796497fcb --- /dev/null +++ b/sim/app/w/[id]/components/panel/components/console/components/audio-player/audio-player.tsx @@ -0,0 +1,121 @@ +'use client' + +import { useEffect, useRef, useState } from 'react' +import { Pause, Play, Download } from 'lucide-react' +import { createLogger } from '@/lib/logs/console-logger' + +const logger = createLogger('AudioPlayer') + +interface AudioPlayerProps { + audioUrl: string +} + +export function AudioPlayer({ audioUrl }: AudioPlayerProps) { + const [isPlaying, setIsPlaying] = useState(false) + const [progress, setProgress] = useState(0) + const audioRef = useRef(null) + + useEffect(() => { + if (!audioRef.current) { + audioRef.current = new Audio(audioUrl) + + audioRef.current.addEventListener('ended', () => setIsPlaying(false)) + audioRef.current.addEventListener('pause', () => setIsPlaying(false)) + audioRef.current.addEventListener('play', () => setIsPlaying(true)) + audioRef.current.addEventListener('timeupdate', updateProgress) + } else { + audioRef.current.src = audioUrl + setProgress(0) + } + + return () => { + if (audioRef.current) { + audioRef.current.pause() + audioRef.current.removeEventListener('ended', () => setIsPlaying(false)) + audioRef.current.removeEventListener('pause', () => setIsPlaying(false)) + audioRef.current.removeEventListener('play', () => setIsPlaying(true)) + audioRef.current.removeEventListener('timeupdate', updateProgress) + } + } + }, [audioUrl]) + + const updateProgress = () => { + if (audioRef.current) { + const value = (audioRef.current.currentTime / audioRef.current.duration) * 100 + setProgress(isNaN(value) ? 0 : value) + } + } + + const togglePlay = () => { + if (!audioRef.current) return + + if (isPlaying) { + audioRef.current.pause() + } else { + audioRef.current.play() + } + } + + const downloadAudio = async () => { + try { + const response = await fetch(audioUrl) + const blob = await response.blob() + + const url = URL.createObjectURL(blob) + const link = document.createElement('a') + link.href = url + link.download = `tts-audio-${Date.now()}.mp3` + document.body.appendChild(link) + link.click() + document.body.removeChild(link) + + URL.revokeObjectURL(url) + } catch (error) { + logger.error('Error downloading audio:', error) + } + } + + const seekAudio = (e: React.MouseEvent) => { + if (!audioRef.current) return + + const container = e.currentTarget + const rect = container.getBoundingClientRect() + const x = e.clientX - rect.left + const percent = x / rect.width + + audioRef.current.currentTime = percent * audioRef.current.duration + } + + return ( +
+ + +
+
+
+ + +
+ ) +} diff --git a/sim/app/w/[id]/components/panel/components/console/components/json-view/json-view.tsx b/sim/app/w/[id]/components/panel/components/console/components/json-view/json-view.tsx index 188706a19a..ec4e51a8c8 100644 --- a/sim/app/w/[id]/components/panel/components/console/components/json-view/json-view.tsx +++ b/sim/app/w/[id]/components/panel/components/console/components/json-view/json-view.tsx @@ -1,6 +1,7 @@ import { useEffect, useState } from 'react' import { Download } from 'lucide-react' import { Button } from '@/components/ui/button' +import { AudioPlayer } from '../audio-player/audio-player' interface JSONViewProps { data: any @@ -45,6 +46,16 @@ const isImageData = (obj: any): boolean => { return obj && typeof obj === 'object' && 'url' in obj && typeof obj.url === 'string' } +// Helper function to check if an object contains an audio URL +const isAudioData = (obj: any): boolean => { + return ( + obj && + typeof obj === 'object' && + 'audioUrl' in obj && + typeof obj.audioUrl === 'string' + ) +} + // Helper function to check if a string is likely a base64 image const isBase64Image = (str: string): boolean => { if (typeof str !== 'string') return false @@ -127,7 +138,7 @@ const ImagePreview = ({ blob = new Blob([arrayBuffer], { type: 'image/png' }) } else if (imageUrl && imageUrl.length > 0) { // Use proxy endpoint to fetch image - const proxyUrl = `/api/proxy-image?url=${encodeURIComponent(imageUrl)}` + const proxyUrl = `/api/proxy/image?url=${encodeURIComponent(imageUrl)}` const response = await fetch(proxyUrl) if (!response.ok) { throw new Error(`Failed to download image: ${response.statusText}`) @@ -239,6 +250,9 @@ export const JSONView = ({ data, level = 0, initiallyExpanded = false }: JSONVie // Check if current object contains image URL const hasImageUrl = isImageData(data) + + // Check if current object contains audio URL + const hasAudioUrl = isAudioData(data) // Check if this is a response object with the new image format const isResponseWithImage = hasImageContent(data) @@ -556,6 +570,9 @@ export const JSONView = ({ data, level = 0, initiallyExpanded = false }: JSONVie {/* Direct image render for objects with image URLs */} {!isCollapsed && hasImageUrl && } + + {/* Direct audio render for objects with audio URLs */} + {!isCollapsed && hasAudioUrl && } {contextMenuPosition && (
{ + // Special case for common parameter names + if (paramId === 'apiKey') return 'API Key' + if (paramId === 'apiVersion') return 'API Version' + + // Handle underscore and hyphen separated words + if (paramId.includes('_') || paramId.includes('-')) { + return paramId + .split(/[-_]/) + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' ') + } + + // Handle single character parameters + if (paramId.length === 1) return paramId.toUpperCase() + + // Handle camelCase + if (/[A-Z]/.test(paramId)) { + const result = paramId.replace(/([A-Z])/g, ' $1') + return result.charAt(0).toUpperCase() + result.slice(1) + .replace(/ Api/g, ' API') + .replace(/ Id/g, ' ID') + .replace(/ Url/g, ' URL') + .replace(/ Uri/g, ' URI') + .replace(/ Ui/g, ' UI') + } + + // Simple case - just capitalize first letter + return paramId.charAt(0).toUpperCase() + paramId.slice(1) +} + export function ToolInput({ blockId, subBlockId }: ToolInputProps) { const [value, setValue] = useSubBlockValue(blockId, subBlockId) const [open, setOpen] = useState(false) @@ -801,15 +833,7 @@ export function ToolInput({ blockId, subBlockId }: ToolInputProps) { {requiredParams.map((param) => (
- {param.id === 'apiKey' - ? 'API Key' - : param.id.length === 1 || - param.id.includes('_') || - param.id.includes('-') - ? param.id.toUpperCase() - : param.id.match(/^[a-z]+$/) - ? param.id.charAt(0).toUpperCase() + param.id.slice(1) - : param.id} + {formatParamId(param.id)}
= { + type: 'elevenlabs', + name: 'ElevenLabs', + description: 'Convert TTS using ElevenLabs', + longDescription: 'Generate realistic speech from text using ElevenLabs voices.', + category: 'tools', + bgColor: '#181C1E', + icon: ElevenLabsIcon, + + tools: { + access: ['elevenlabs_tts'], + config: { + tool: () => 'elevenlabs_tts', + params: (params) => ({ + apiKey: params.apiKey, + text: params.text, + voiceId: params.voiceId, + modelId: params.modelId, + }), + }, + }, + + inputs: { + text: { + type: 'string', + required: true, + }, + voiceId: { + type: 'string', + required: true, + }, + modelId: { + type: 'string', + required: false, + }, + apiKey: { + type: 'string', + required: true, + }, + }, + + outputs: { + response: { + type: { + audioUrl: 'string', + }, + }, + }, + + subBlocks: [ + { + id: 'text', + title: 'Text', + type: 'long-input', + layout: 'full', + placeholder: 'Enter the text to convert to speech', + }, + { + id: 'voiceId', + title: 'Voice ID', + type: 'short-input', + layout: 'full', + placeholder: 'Enter the voice ID', + }, + { + id: 'apiKey', + title: 'API Key', + type: 'short-input', + layout: 'full', + placeholder: 'Enter your ElevenLabs API key', + password: true, + }, + { + id: 'modelId', + title: 'Model ID (Optional)', + type: 'dropdown', + layout: 'half', + options: [ + 'eleven_monolingual_v1', + 'eleven_multilingual_v2', + 'eleven_turbo_v2', + 'eleven_turbo_v2_5', + 'eleven_flash_v2_5' + ], + }, + ], +} diff --git a/sim/blocks/index.ts b/sim/blocks/index.ts index a0d48a5488..7b974319ac 100644 --- a/sim/blocks/index.ts +++ b/sim/blocks/index.ts @@ -8,6 +8,7 @@ import { ConditionBlock } from './blocks/condition' import { ConfluenceBlock } from './blocks/confluence' import { GoogleDocsBlock } from './blocks/docs' import { GoogleDriveBlock } from './blocks/drive' +import { ElevenLabsBlock } from './blocks/elevenlabs' import { EvaluatorBlock } from './blocks/evaluator' import { ExaBlock } from './blocks/exa' import { FileBlock } from './blocks/file' @@ -52,6 +53,7 @@ export { ApiBlock, BrowserUseBlock, // AutoblocksBlock, + ElevenLabsBlock, Mem0Block, MistralParseBlock, FunctionBlock, @@ -101,6 +103,7 @@ const blocks: Record = { // autoblocks: AutoblocksBlock, condition: ConditionBlock, confluence: ConfluenceBlock, + elevenlabs_tts: ElevenLabsBlock, evaluator: EvaluatorBlock, exa: ExaBlock, firecrawl: FirecrawlBlock, diff --git a/sim/components/icons.tsx b/sim/components/icons.tsx index dd4918bba5..2e70b99ac7 100644 --- a/sim/components/icons.tsx +++ b/sim/components/icons.tsx @@ -2154,4 +2154,13 @@ export function Mem0Icon(props: SVGProps) { ) +} + +export function ElevenLabsIcon(props: SVGProps) { + return ( + + + + + ) } \ No newline at end of file diff --git a/sim/tools/elevenlabs/index.ts b/sim/tools/elevenlabs/index.ts new file mode 100644 index 0000000000..1433038291 --- /dev/null +++ b/sim/tools/elevenlabs/index.ts @@ -0,0 +1,3 @@ +import { elevenLabsTtsTool } from './tts' + +export { elevenLabsTtsTool } \ No newline at end of file diff --git a/sim/tools/elevenlabs/tts.ts b/sim/tools/elevenlabs/tts.ts new file mode 100644 index 0000000000..461a121111 --- /dev/null +++ b/sim/tools/elevenlabs/tts.ts @@ -0,0 +1,75 @@ +import { ToolConfig } from '../types' +import { createLogger } from '@/lib/logs/console-logger' +import { ElevenLabsTtsParams } from './types' +import { ElevenLabsTtsResponse } from './types' + +const logger = createLogger('ElevenLabsTool') + +export const elevenLabsTtsTool: ToolConfig = { + id: 'elevenlabs_tts', + name: 'ElevenLabs TTS', + description: 'Convert TTS using ElevenLabs voices', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + description: 'Your ElevenLabs API key', + requiredForToolCall: true, + }, + text: { + type: 'string', + required: true, + description: 'The text to convert to speech', + }, + voiceId: { + type: 'string', + required: true, + description: 'The ID of the voice to use', + requiredForToolCall: true, + }, + modelId: { + type: 'string', + required: false, + description: 'The ID of the model to use (defaults to eleven_monolingual_v1)', + }, + }, + + request: { + url: '/api/proxy/tts', + method: 'POST', + headers: (params) => ({ + 'Content-Type': 'application/json', + }), + body: (params) => ({ + apiKey: params.apiKey, + text: params.text, + voiceId: params.voiceId, + modelId: params.modelId || 'eleven_monolingual_v1', + }), + isInternalRoute: true, + }, + + transformResponse: async (response: Response) => { + if (!response.ok) { + throw new Error(`ElevenLabs API error: ${response.status} ${response.statusText}`) + } + + // Create a blob URL that can be used in an audio player + const audioBlob = await response.blob() + const audioUrl = URL.createObjectURL(audioBlob) + + return { + success: true, + output: { + audioUrl, + }, + } + }, + + transformError: (error) => { + logger.error('ElevenLabs TTS error:', error) + return `Error generating speech: ${error instanceof Error ? error.message : String(error)}` + }, +} diff --git a/sim/tools/elevenlabs/types.ts b/sim/tools/elevenlabs/types.ts new file mode 100644 index 0000000000..80cc3380f3 --- /dev/null +++ b/sim/tools/elevenlabs/types.ts @@ -0,0 +1,15 @@ +import { ToolResponse } from "../types" + +export interface ElevenLabsTtsParams { + apiKey: string + text: string + voiceId: string + modelId?: string + } + + export interface ElevenLabsTtsResponse extends ToolResponse { + output: { + audioUrl: string + } + } + \ No newline at end of file diff --git a/sim/tools/openai/dalle.ts b/sim/tools/openai/dalle.ts index 7e81c4b58c..90d7438fda 100644 --- a/sim/tools/openai/dalle.ts +++ b/sim/tools/openai/dalle.ts @@ -1,4 +1,7 @@ import { ToolConfig, ToolResponse } from '../types' +import { createLogger } from '@/lib/logs/console-logger' + +const logger = createLogger('DalleTool') export interface DalleResponse extends ToolResponse { output: { @@ -72,23 +75,23 @@ export const dalleTool: ToolConfig = { try { const data = await response.json() - console.log('DALL-E API response:', JSON.stringify(data, null, 2)) + logger.info('DALL-E API response:', JSON.stringify(data, null, 2)) if (!data.data?.[0]?.url) { - console.error('No image URL in DALL-E response:', data) + logger.error('No image URL in DALL-E response:', data) throw new Error('No image URL in response') } const imageUrl = data.data[0].url const modelName = data.model || params?.model || 'dall-e' - console.log('Got image URL:', imageUrl) - console.log('Using model:', modelName) + logger.info('Got image URL:', imageUrl) + logger.info('Using model:', modelName) try { - // Fetch the image using the proxy-image endpoint instead of direct fetch - console.log('Fetching image from URL via proxy...') - const proxyUrl = `/api/proxy-image?url=${encodeURIComponent(imageUrl)}` + // Fetch the image using the proxy/image endpoint instead of direct fetch + logger.info('Fetching image from URL via proxy...') + const proxyUrl = `/api/proxy/image?url=${encodeURIComponent(imageUrl)}` const imageResponse = await fetch(proxyUrl, { headers: { @@ -98,20 +101,14 @@ export const dalleTool: ToolConfig = { }) if (!imageResponse.ok) { - console.error('Failed to fetch image:', imageResponse.status, imageResponse.statusText) + logger.error('Failed to fetch image:', imageResponse.status, imageResponse.statusText) throw new Error(`Failed to fetch image: ${imageResponse.statusText}`) } - console.log( - 'Image fetch successful, content-type:', - imageResponse.headers.get('content-type') - ) - const imageBlob = await imageResponse.blob() - console.log('Image blob size:', imageBlob.size) if (imageBlob.size === 0) { - console.error('Empty image blob received') + logger.error('Empty image blob received') throw new Error('Empty image received') } @@ -126,15 +123,14 @@ export const dalleTool: ToolConfig = { } const base64Content = base64data.split(',')[1] // Remove the data URL prefix - console.log('Successfully converted image to base64, length:', base64Content.length) resolve(base64Content) } catch (err) { - console.error('Error in FileReader onloadend:', err) + logger.error('Error in FileReader onloadend:', err) reject(err) } } reader.onerror = (err) => { - console.error('FileReader error:', err) + logger.error('FileReader error:', err) reject(new Error('Failed to read image data')) } reader.readAsDataURL(imageBlob) @@ -142,7 +138,6 @@ export const dalleTool: ToolConfig = { const base64Image = await base64Promise - console.log('Returning success response with image data') return { success: true, output: { @@ -155,11 +150,11 @@ export const dalleTool: ToolConfig = { } } catch (error) { // Log the error but continue with returning the URL - console.error('Error fetching or processing image:', error) + logger.error('Error fetching or processing image:', error) // Try again with a direct browser fetch as fallback try { - console.log('Attempting fallback with direct browser fetch...') + logger.info('Attempting fallback with direct browser fetch...') const directImageResponse = await fetch(imageUrl, { cache: 'no-store', headers: { @@ -188,7 +183,7 @@ export const dalleTool: ToolConfig = { } const base64Content = base64data.split(',')[1] - console.log( + logger.info( 'Successfully converted image to base64 via direct fetch, length:', base64Content.length ) @@ -214,7 +209,7 @@ export const dalleTool: ToolConfig = { }, } } catch (fallbackError) { - console.error('Fallback fetch also failed:', fallbackError) + logger.error('Fallback fetch also failed:', fallbackError) // Even if both attempts fail, still return the URL and metadata return { @@ -230,12 +225,12 @@ export const dalleTool: ToolConfig = { } } } catch (error) { - console.error('Error in DALL-E response handling:', error) + logger.error('Error in DALL-E response handling:', error) throw error } }, transformError: (error) => { - console.error('DALL-E error:', error) + logger.error('DALL-E error:', error) if (error.response?.data?.error?.message) { return error.response.data.error.message } diff --git a/sim/tools/registry.ts b/sim/tools/registry.ts index 1102d93958..6ae8722484 100644 --- a/sim/tools/registry.ts +++ b/sim/tools/registry.ts @@ -37,6 +37,7 @@ import { whatsappSendMessageTool } from './whatsapp' import { xReadTool, xSearchTool, xUserTool, xWriteTool } from './x' import { youtubeSearchTool } from './youtube/search' import { ToolConfig } from './types' +import { elevenLabsTtsTool } from './elevenlabs' // Registry of all available tools export const tools: Record = { @@ -113,4 +114,5 @@ export const tools: Record = { mem0_add_memories: mem0AddMemoriesTool, mem0_search_memories: mem0SearchMemoriesTool, mem0_get_memories: mem0GetMemoriesTool, + elevenlabs_tts: elevenLabsTtsTool, } \ No newline at end of file