Compare commits

...

2 Commits

Author SHA1 Message Date
Swifty
4e2bcebbc6 Merge branch 'dev' into swiftyos/automat-19-elevenlabs 2025-07-14 12:18:45 +02:00
SwiftyOS
0d1aafbf10 elevenlabs integration 2025-07-11 11:21:29 +02:00
8 changed files with 1082 additions and 0 deletions

View File

@@ -0,0 +1,48 @@
"""
ElevenLabs integration blocks for AutoGPT Platform.
"""
# Speech generation blocks
from .speech import (
ElevenLabsGenerateSpeechBlock,
ElevenLabsGenerateSpeechWithTimestampsBlock,
)
# Speech-to-text blocks
from .transcription import (
ElevenLabsTranscribeAudioAsyncBlock,
ElevenLabsTranscribeAudioSyncBlock,
)
# Webhook trigger blocks
from .triggers import ElevenLabsWebhookTriggerBlock
# Utility blocks
from .utility import ElevenLabsGetUsageStatsBlock, ElevenLabsListModelsBlock
# Voice management blocks
from .voices import (
ElevenLabsCreateVoiceCloneBlock,
ElevenLabsDeleteVoiceBlock,
ElevenLabsGetVoiceDetailsBlock,
ElevenLabsListVoicesBlock,
)
__all__ = [
# Voice management
"ElevenLabsListVoicesBlock",
"ElevenLabsGetVoiceDetailsBlock",
"ElevenLabsCreateVoiceCloneBlock",
"ElevenLabsDeleteVoiceBlock",
# Speech generation
"ElevenLabsGenerateSpeechBlock",
"ElevenLabsGenerateSpeechWithTimestampsBlock",
# Speech-to-text
"ElevenLabsTranscribeAudioSyncBlock",
"ElevenLabsTranscribeAudioAsyncBlock",
# Utility
"ElevenLabsListModelsBlock",
"ElevenLabsGetUsageStatsBlock",
# Webhook triggers
"ElevenLabsWebhookTriggerBlock",
]

View File

@@ -0,0 +1,16 @@
"""
Shared configuration for all ElevenLabs blocks using the SDK pattern.
"""
from backend.sdk import BlockCostType, ProviderBuilder
from ._webhook import ElevenLabsWebhookManager
# Configure the ElevenLabs provider with API key authentication
elevenlabs = (
ProviderBuilder("elevenlabs")
.with_api_key("ELEVENLABS_API_KEY", "ElevenLabs API Key")
.with_webhook_manager(ElevenLabsWebhookManager)
.with_base_cost(2, BlockCostType.RUN) # Base cost for API calls
.build()
)

View File

@@ -0,0 +1,82 @@
"""
ElevenLabs webhook manager for handling webhook events.
"""
import hashlib
import hmac
from typing import Tuple
from backend.data.model import Credentials
from backend.sdk import BaseWebhooksManager, ProviderName, Webhook
class ElevenLabsWebhookManager(BaseWebhooksManager):
"""Manages ElevenLabs webhook events."""
PROVIDER_NAME = ProviderName("elevenlabs")
@classmethod
async def validate_payload(cls, webhook: Webhook, request) -> Tuple[dict, str]:
"""
Validate incoming webhook payload and signature.
ElevenLabs supports HMAC authentication for webhooks.
"""
payload = await request.json()
# Verify webhook signature if configured
if webhook.secret:
webhook_secret = webhook.config.get("webhook_secret")
if webhook_secret:
# Get the raw body for signature verification
body = await request.body()
# Calculate expected signature
expected_signature = hmac.new(
webhook_secret.encode(), body, hashlib.sha256
).hexdigest()
# Get signature from headers
signature = request.headers.get("x-elevenlabs-signature")
if signature and not hmac.compare_digest(signature, expected_signature):
raise ValueError("Invalid webhook signature")
# Extract event type from payload
event_type = payload.get("type", "unknown")
return payload, event_type
async def _register_webhook(
self,
credentials: Credentials,
webhook_type: str,
resource: str,
events: list[str],
ingress_url: str,
secret: str,
) -> tuple[str, dict]:
"""
Register a webhook with ElevenLabs.
Note: ElevenLabs webhook registration is done through their dashboard,
not via API. This is a placeholder implementation.
"""
# ElevenLabs requires manual webhook setup through dashboard
# Return empty webhook ID and config with instructions
config = {
"manual_setup_required": True,
"webhook_secret": secret,
"instructions": "Please configure webhook URL in ElevenLabs dashboard",
}
return "", config
async def _deregister_webhook(
self, webhook: Webhook, credentials: Credentials
) -> None:
"""
Deregister a webhook with ElevenLabs.
Note: ElevenLabs webhook removal is done through their dashboard.
"""
# ElevenLabs requires manual webhook removal through dashboard
pass

View File

@@ -0,0 +1,179 @@
"""
ElevenLabs speech generation (text-to-speech) blocks.
"""
from typing import Optional
from backend.sdk import (
APIKeyCredentials,
Block,
BlockCategory,
BlockOutput,
BlockSchema,
CredentialsMetaInput,
Requests,
SchemaField,
)
from ._config import elevenlabs
class ElevenLabsGenerateSpeechBlock(Block):
"""
Turn text into audio (binary).
"""
class Input(BlockSchema):
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
description="ElevenLabs API credentials"
)
voice_id: str = SchemaField(description="ID of the voice to use")
text: str = SchemaField(description="Text to convert to speech")
model_id: str = SchemaField(
description="Model ID to use for generation",
default="eleven_multilingual_v2",
)
output_format: str = SchemaField(
description="Audio format (e.g., mp3_44100_128)",
default="mp3_44100_128",
)
voice_settings: Optional[dict] = SchemaField(
description="Override voice settings (stability, similarity_boost, etc.)",
default=None,
)
language_code: Optional[str] = SchemaField(
description="Language code to enforce output language", default=None
)
seed: Optional[int] = SchemaField(
description="Seed for reproducible output", default=None
)
class Output(BlockSchema):
audio: str = SchemaField(description="Base64-encoded audio data")
def __init__(self):
super().__init__(
id="c5d6e7f8-a9b0-c1d2-e3f4-a5b6c7d8e9f0",
description="Generate speech audio from text using a specified voice",
categories={BlockCategory.AI},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
import base64
api_key = credentials.api_key.get_secret_value()
# Build request body
body: dict[str, str | int | dict] = {
"text": input_data.text,
"model_id": input_data.model_id,
}
# Add optional fields
if input_data.voice_settings:
body["voice_settings"] = input_data.voice_settings
if input_data.language_code:
body["language_code"] = input_data.language_code
if input_data.seed is not None:
body["seed"] = input_data.seed
# Generate speech
response = await Requests().post(
f"https://api.elevenlabs.io/v1/text-to-speech/{input_data.voice_id}",
headers={
"xi-api-key": api_key,
"Content-Type": "application/json",
},
json=body,
params={"output_format": input_data.output_format},
)
# Get audio data and encode to base64
audio_data = response.content
audio_base64 = base64.b64encode(audio_data).decode("utf-8")
yield "audio", audio_base64
class ElevenLabsGenerateSpeechWithTimestampsBlock(Block):
"""
Text to audio AND per-character timing data.
"""
class Input(BlockSchema):
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
description="ElevenLabs API credentials"
)
voice_id: str = SchemaField(description="ID of the voice to use")
text: str = SchemaField(description="Text to convert to speech")
model_id: str = SchemaField(
description="Model ID to use for generation",
default="eleven_multilingual_v2",
)
output_format: str = SchemaField(
description="Audio format (e.g., mp3_44100_128)",
default="mp3_44100_128",
)
voice_settings: Optional[dict] = SchemaField(
description="Override voice settings (stability, similarity_boost, etc.)",
default=None,
)
language_code: Optional[str] = SchemaField(
description="Language code to enforce output language", default=None
)
class Output(BlockSchema):
audio_base64: str = SchemaField(description="Base64-encoded audio data")
alignment: dict = SchemaField(
description="Character-level timing alignment data"
)
normalized_alignment: dict = SchemaField(
description="Normalized text alignment data"
)
def __init__(self):
super().__init__(
id="d6e7f8a9-b0c1-d2e3-f4a5-b6c7d8e9f0a1",
description="Generate speech with character-level timestamp information",
categories={BlockCategory.AI},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
api_key = credentials.api_key.get_secret_value()
# Build request body
body: dict[str, str | dict] = {
"text": input_data.text,
"model_id": input_data.model_id,
}
# Add optional fields
if input_data.voice_settings:
body["voice_settings"] = input_data.voice_settings
if input_data.language_code:
body["language_code"] = input_data.language_code
# Generate speech with timestamps
response = await Requests().post(
f"https://api.elevenlabs.io/v1/text-to-speech/{input_data.voice_id}/with-timestamps",
headers={
"xi-api-key": api_key,
"Content-Type": "application/json",
},
json=body,
params={"output_format": input_data.output_format},
)
data = response.json()
yield "audio_base64", data.get("audio_base64", "")
yield "alignment", data.get("alignment", {})
yield "normalized_alignment", data.get("normalized_alignment", {})

View File

@@ -0,0 +1,232 @@
"""
ElevenLabs speech-to-text (transcription) blocks.
"""
from typing import Optional
from backend.sdk import (
APIKeyCredentials,
Block,
BlockCategory,
BlockOutput,
BlockSchema,
CredentialsMetaInput,
Requests,
SchemaField,
)
from ._config import elevenlabs
class ElevenLabsTranscribeAudioSyncBlock(Block):
"""
Synchronously convert audio to text (+ word timestamps, diarization).
"""
class Input(BlockSchema):
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
description="ElevenLabs API credentials"
)
model_id: str = SchemaField(
description="Model ID for transcription", default="scribe_v1"
)
file: Optional[str] = SchemaField(
description="Base64-encoded audio file", default=None
)
cloud_storage_url: Optional[str] = SchemaField(
description="URL to audio file in cloud storage", default=None
)
language_code: Optional[str] = SchemaField(
description="Language code (ISO 639-1 or -3) to improve accuracy",
default=None,
)
diarize: bool = SchemaField(
description="Enable speaker diarization", default=False
)
num_speakers: Optional[int] = SchemaField(
description="Expected number of speakers (max 32)", default=None
)
timestamps_granularity: str = SchemaField(
description="Timestamp detail level: word, character, or none",
default="word",
)
tag_audio_events: bool = SchemaField(
description="Tag non-speech sounds (laughter, noise)", default=True
)
class Output(BlockSchema):
text: str = SchemaField(description="Full transcribed text")
words: list[dict] = SchemaField(
description="Array with word timing and speaker info"
)
language_code: str = SchemaField(description="Detected language code")
language_probability: float = SchemaField(
description="Confidence in language detection"
)
def __init__(self):
super().__init__(
id="e7f8a9b0-c1d2-e3f4-a5b6-c7d8e9f0a1b2",
description="Transcribe audio to text with timing and speaker information",
categories={BlockCategory.AI},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
import base64
from io import BytesIO
api_key = credentials.api_key.get_secret_value()
# Validate input - must have either file or URL
if not input_data.file and not input_data.cloud_storage_url:
raise ValueError("Either 'file' or 'cloud_storage_url' must be provided")
if input_data.file and input_data.cloud_storage_url:
raise ValueError(
"Only one of 'file' or 'cloud_storage_url' should be provided"
)
# Build form data
form_data = {
"model_id": input_data.model_id,
"diarize": str(input_data.diarize).lower(),
"timestamps_granularity": input_data.timestamps_granularity,
"tag_audio_events": str(input_data.tag_audio_events).lower(),
}
if input_data.language_code:
form_data["language_code"] = input_data.language_code
if input_data.num_speakers is not None:
form_data["num_speakers"] = str(input_data.num_speakers)
# Handle file or URL
files = None
if input_data.file:
# Decode base64 file
file_data = base64.b64decode(input_data.file)
files = [("file", ("audio.wav", BytesIO(file_data), "audio/wav"))]
elif input_data.cloud_storage_url:
form_data["cloud_storage_url"] = input_data.cloud_storage_url
# Transcribe audio
response = await Requests().post(
"https://api.elevenlabs.io/v1/speech-to-text",
headers={"xi-api-key": api_key},
data=form_data,
files=files,
)
data = response.json()
yield "text", data.get("text", "")
yield "words", data.get("words", [])
yield "language_code", data.get("language_code", "")
yield "language_probability", data.get("language_probability", 0.0)
class ElevenLabsTranscribeAudioAsyncBlock(Block):
"""
Kick off transcription that returns quickly; result arrives via webhook.
"""
class Input(BlockSchema):
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
description="ElevenLabs API credentials"
)
model_id: str = SchemaField(
description="Model ID for transcription", default="scribe_v1"
)
file: Optional[str] = SchemaField(
description="Base64-encoded audio file", default=None
)
cloud_storage_url: Optional[str] = SchemaField(
description="URL to audio file in cloud storage", default=None
)
language_code: Optional[str] = SchemaField(
description="Language code (ISO 639-1 or -3) to improve accuracy",
default=None,
)
diarize: bool = SchemaField(
description="Enable speaker diarization", default=False
)
num_speakers: Optional[int] = SchemaField(
description="Expected number of speakers (max 32)", default=None
)
timestamps_granularity: str = SchemaField(
description="Timestamp detail level: word, character, or none",
default="word",
)
webhook_url: str = SchemaField(
description="URL to receive transcription result",
default="",
)
class Output(BlockSchema):
tracking_id: str = SchemaField(description="ID to track the transcription job")
def __init__(self):
super().__init__(
id="f8a9b0c1-d2e3-f4a5-b6c7-d8e9f0a1b2c3",
description="Start async transcription with webhook callback",
categories={BlockCategory.AI},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
import base64
import uuid
from io import BytesIO
api_key = credentials.api_key.get_secret_value()
# Validate input
if not input_data.file and not input_data.cloud_storage_url:
raise ValueError("Either 'file' or 'cloud_storage_url' must be provided")
if input_data.file and input_data.cloud_storage_url:
raise ValueError(
"Only one of 'file' or 'cloud_storage_url' should be provided"
)
# Build form data
form_data = {
"model_id": input_data.model_id,
"diarize": str(input_data.diarize).lower(),
"timestamps_granularity": input_data.timestamps_granularity,
"webhook": "true", # Enable async mode
}
if input_data.language_code:
form_data["language_code"] = input_data.language_code
if input_data.num_speakers is not None:
form_data["num_speakers"] = str(input_data.num_speakers)
if input_data.webhook_url:
form_data["webhook_url"] = input_data.webhook_url
# Handle file or URL
files = None
if input_data.file:
# Decode base64 file
file_data = base64.b64decode(input_data.file)
files = [("file", ("audio.wav", BytesIO(file_data), "audio/wav"))]
elif input_data.cloud_storage_url:
form_data["cloud_storage_url"] = input_data.cloud_storage_url
# Start async transcription
response = await Requests().post(
"https://api.elevenlabs.io/v1/speech-to-text",
headers={"xi-api-key": api_key},
data=form_data,
files=files,
)
# Generate tracking ID (API might return one)
data = response.json()
tracking_id = data.get("tracking_id", str(uuid.uuid4()))
yield "tracking_id", tracking_id

View File

@@ -0,0 +1,160 @@
"""
ElevenLabs webhook trigger blocks.
"""
from pydantic import BaseModel
from backend.sdk import (
Block,
BlockCategory,
BlockOutput,
BlockSchema,
BlockType,
BlockWebhookConfig,
CredentialsMetaInput,
ProviderName,
SchemaField,
)
from ._config import elevenlabs
class ElevenLabsWebhookTriggerBlock(Block):
"""
Starts a flow when ElevenLabs POSTs an event (STT finished, voice removal, etc.).
"""
class Input(BlockSchema):
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
description="ElevenLabs API credentials"
)
webhook_url: str = SchemaField(
description="URL to receive webhooks (auto-generated)",
default="",
hidden=True,
)
class EventsFilter(BaseModel):
"""ElevenLabs event types to subscribe to"""
speech_to_text_completed: bool = SchemaField(
description="Speech-to-text transcription completed", default=True
)
post_call_transcription: bool = SchemaField(
description="Conversational AI call transcription completed",
default=True,
)
voice_removal_notice: bool = SchemaField(
description="Voice scheduled for removal", default=True
)
voice_removed: bool = SchemaField(
description="Voice has been removed", default=True
)
voice_removal_notice_withdrawn: bool = SchemaField(
description="Voice removal cancelled", default=True
)
events: EventsFilter = SchemaField(
title="Events", description="The events to subscribe to"
)
# Webhook payload - populated by the system
payload: dict = SchemaField(
description="Webhook payload data",
default={},
hidden=True,
)
class Output(BlockSchema):
type: str = SchemaField(description="Event type")
event_timestamp: int = SchemaField(description="Unix timestamp of the event")
data: dict = SchemaField(description="Event-specific data payload")
def __init__(self):
super().__init__(
id="c1d2e3f4-a5b6-c7d8-e9f0-a1b2c3d4e5f6",
description="Receive webhook events from ElevenLabs",
categories={BlockCategory.DEVELOPER_TOOLS},
input_schema=self.Input,
output_schema=self.Output,
block_type=BlockType.WEBHOOK,
webhook_config=BlockWebhookConfig(
provider=ProviderName("elevenlabs"),
webhook_type="notification",
event_filter_input="events",
resource_format="",
),
)
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
# Extract webhook data
payload = input_data.payload
# Extract event type
event_type = payload.get("type", "unknown")
# Map event types to filter fields
event_filter_map = {
"speech_to_text_completed": input_data.events.speech_to_text_completed,
"post_call_transcription": input_data.events.post_call_transcription,
"voice_removal_notice": input_data.events.voice_removal_notice,
"voice_removed": input_data.events.voice_removed,
"voice_removal_notice_withdrawn": input_data.events.voice_removal_notice_withdrawn,
}
# Check if this event type is enabled
if not event_filter_map.get(event_type, False):
# Skip this event
return
# Extract common fields
yield "type", event_type
yield "event_timestamp", payload.get("event_timestamp", 0)
# Extract event-specific data
data = payload.get("data", {})
# Process based on event type
if event_type == "speech_to_text_completed":
# STT transcription completed
processed_data = {
"transcription_id": data.get("transcription_id"),
"text": data.get("text"),
"words": data.get("words", []),
"language_code": data.get("language_code"),
"language_probability": data.get("language_probability"),
}
elif event_type == "post_call_transcription":
# Conversational AI call transcription
processed_data = {
"agent_id": data.get("agent_id"),
"conversation_id": data.get("conversation_id"),
"transcript": data.get("transcript"),
"metadata": data.get("metadata", {}),
}
elif event_type == "voice_removal_notice":
# Voice scheduled for removal
processed_data = {
"voice_id": data.get("voice_id"),
"voice_name": data.get("voice_name"),
"removal_date": data.get("removal_date"),
"reason": data.get("reason"),
}
elif event_type == "voice_removal_notice_withdrawn":
# Voice removal cancelled
processed_data = {
"voice_id": data.get("voice_id"),
"voice_name": data.get("voice_name"),
}
elif event_type == "voice_removed":
# Voice has been removed
processed_data = {
"voice_id": data.get("voice_id"),
"voice_name": data.get("voice_name"),
"removed_at": data.get("removed_at"),
}
else:
# Unknown event type, pass through raw data
processed_data = data
yield "data", processed_data

View File

@@ -0,0 +1,116 @@
"""
ElevenLabs utility blocks for models and usage stats.
"""
from backend.sdk import (
APIKeyCredentials,
Block,
BlockCategory,
BlockOutput,
BlockSchema,
CredentialsMetaInput,
Requests,
SchemaField,
)
from ._config import elevenlabs
class ElevenLabsListModelsBlock(Block):
"""
Get all available model IDs & capabilities.
"""
class Input(BlockSchema):
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
description="ElevenLabs API credentials"
)
class Output(BlockSchema):
models: list[dict] = SchemaField(
description="Array of model objects with capabilities"
)
def __init__(self):
super().__init__(
id="a9b0c1d2-e3f4-a5b6-c7d8-e9f0a1b2c3d4",
description="List all available voice models and their capabilities",
categories={BlockCategory.DEVELOPER_TOOLS},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
api_key = credentials.api_key.get_secret_value()
# Fetch models
response = await Requests().get(
"https://api.elevenlabs.io/v1/models",
headers={"xi-api-key": api_key},
)
models = response.json()
yield "models", models
class ElevenLabsGetUsageStatsBlock(Block):
"""
Character / credit usage for billing dashboards.
"""
class Input(BlockSchema):
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
description="ElevenLabs API credentials"
)
start_unix: int = SchemaField(
description="Start timestamp in Unix epoch seconds"
)
end_unix: int = SchemaField(description="End timestamp in Unix epoch seconds")
aggregation_interval: str = SchemaField(
description="Aggregation interval: daily or monthly",
default="daily",
)
class Output(BlockSchema):
usage: list[dict] = SchemaField(description="Array of usage data per interval")
total_character_count: int = SchemaField(
description="Total characters used in period"
)
total_requests: int = SchemaField(description="Total API requests in period")
def __init__(self):
super().__init__(
id="b0c1d2e3-f4a5-b6c7-d8e9-f0a1b2c3d4e5",
description="Get character and credit usage statistics",
categories={BlockCategory.DEVELOPER_TOOLS},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
api_key = credentials.api_key.get_secret_value()
# Build query parameters
params = {
"start_unix": input_data.start_unix,
"end_unix": input_data.end_unix,
"aggregation_interval": input_data.aggregation_interval,
}
# Fetch usage stats
response = await Requests().get(
"https://api.elevenlabs.io/v1/usage/character-stats",
headers={"xi-api-key": api_key},
params=params,
)
data = response.json()
yield "usage", data.get("usage", [])
yield "total_character_count", data.get("total_character_count", 0)
yield "total_requests", data.get("total_requests", 0)

View File

@@ -0,0 +1,249 @@
"""
ElevenLabs voice management blocks.
"""
from typing import Optional
from backend.sdk import (
APIKeyCredentials,
Block,
BlockCategory,
BlockOutput,
BlockSchema,
CredentialsMetaInput,
Requests,
SchemaField,
)
from ._config import elevenlabs
class ElevenLabsListVoicesBlock(Block):
"""
Fetch all voices the account can use (for pick-lists, UI menus, etc.).
"""
class Input(BlockSchema):
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
description="ElevenLabs API credentials"
)
search: str = SchemaField(
description="Search term to filter voices", default=""
)
voice_type: Optional[str] = SchemaField(
description="Filter by voice type: premade, cloned, or professional",
default=None,
)
page_size: int = SchemaField(
description="Number of voices per page (max 100)", default=10
)
next_page_token: str = SchemaField(
description="Token for fetching next page", default=""
)
class Output(BlockSchema):
voices: list[dict] = SchemaField(
description="Array of voice objects with id, name, category, etc."
)
next_page_token: Optional[str] = SchemaField(
description="Token for fetching next page, null if no more pages"
)
def __init__(self):
super().__init__(
id="e1a2b3c4-d5e6-f7a8-b9c0-d1e2f3a4b5c6",
description="List all available voices with filtering and pagination",
categories={BlockCategory.AI},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
api_key = credentials.api_key.get_secret_value()
# Build query parameters
params: dict[str, str | int] = {"page_size": input_data.page_size}
if input_data.search:
params["search"] = input_data.search
if input_data.voice_type:
params["voice_type"] = input_data.voice_type
if input_data.next_page_token:
params["next_page_token"] = input_data.next_page_token
# Fetch voices
response = await Requests().get(
"https://api.elevenlabs.io/v2/voices",
headers={"xi-api-key": api_key},
params=params,
)
data = response.json()
yield "voices", data.get("voices", [])
yield "next_page_token", data.get("next_page_token")
class ElevenLabsGetVoiceDetailsBlock(Block):
"""
Retrieve metadata/settings for a single voice.
"""
class Input(BlockSchema):
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
description="ElevenLabs API credentials"
)
voice_id: str = SchemaField(description="The ID of the voice to retrieve")
class Output(BlockSchema):
voice: dict = SchemaField(
description="Voice object with name, labels, settings, etc."
)
def __init__(self):
super().__init__(
id="f2a3b4c5-d6e7-f8a9-b0c1-d2e3f4a5b6c7",
description="Get detailed information about a specific voice",
categories={BlockCategory.AI},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
api_key = credentials.api_key.get_secret_value()
# Fetch voice details
response = await Requests().get(
f"https://api.elevenlabs.io/v1/voices/{input_data.voice_id}",
headers={"xi-api-key": api_key},
)
voice = response.json()
yield "voice", voice
class ElevenLabsCreateVoiceCloneBlock(Block):
"""
Upload sample clips to create a custom (IVC) voice.
"""
class Input(BlockSchema):
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
description="ElevenLabs API credentials"
)
name: str = SchemaField(description="Name for the new voice")
files: list[str] = SchemaField(
description="Base64-encoded audio files (1-10 files, max 25MB each)"
)
description: str = SchemaField(
description="Description of the voice", default=""
)
labels: dict = SchemaField(
description="Metadata labels (e.g., accent, age)", default={}
)
remove_background_noise: bool = SchemaField(
description="Whether to remove background noise from samples", default=False
)
class Output(BlockSchema):
voice_id: str = SchemaField(description="ID of the newly created voice")
requires_verification: bool = SchemaField(
description="Whether the voice requires verification"
)
def __init__(self):
super().__init__(
id="a3b4c5d6-e7f8-a9b0-c1d2-e3f4a5b6c7d8",
description="Create a new voice clone from audio samples",
categories={BlockCategory.AI},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
import base64
import json
from io import BytesIO
api_key = credentials.api_key.get_secret_value()
# Prepare multipart form data
form_data = {
"name": input_data.name,
}
if input_data.description:
form_data["description"] = input_data.description
if input_data.labels:
form_data["labels"] = json.dumps(input_data.labels)
if input_data.remove_background_noise:
form_data["remove_background_noise"] = "true"
# Prepare files
files = []
for i, file_b64 in enumerate(input_data.files):
file_data = base64.b64decode(file_b64)
files.append(
("files", (f"sample_{i}.mp3", BytesIO(file_data), "audio/mpeg"))
)
# Create voice
response = await Requests().post(
"https://api.elevenlabs.io/v1/voices/add",
headers={"xi-api-key": api_key},
data=form_data,
files=files,
)
result = response.json()
yield "voice_id", result.get("voice_id", "")
yield "requires_verification", result.get("requires_verification", False)
class ElevenLabsDeleteVoiceBlock(Block):
"""
Permanently remove a custom voice.
"""
class Input(BlockSchema):
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
description="ElevenLabs API credentials"
)
voice_id: str = SchemaField(description="The ID of the voice to delete")
class Output(BlockSchema):
status: str = SchemaField(description="Deletion status (ok or error)")
def __init__(self):
super().__init__(
id="b4c5d6e7-f8a9-b0c1-d2e3-f4a5b6c7d8e9",
description="Delete a custom voice from your account",
categories={BlockCategory.AI},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
api_key = credentials.api_key.get_secret_value()
# Delete voice
response = await Requests().delete(
f"https://api.elevenlabs.io/v1/voices/{input_data.voice_id}",
headers={"xi-api-key": api_key},
)
# Check if successful
if response.status in [200, 204]:
yield "status", "ok"
else:
yield "status", "error"