mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-20 20:48:11 -05:00
Compare commits
2 Commits
testing-cl
...
swiftyos/a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4e2bcebbc6 | ||
|
|
0d1aafbf10 |
@@ -0,0 +1,48 @@
|
||||
"""
|
||||
ElevenLabs integration blocks for AutoGPT Platform.
|
||||
"""
|
||||
|
||||
# Speech generation blocks
|
||||
from .speech import (
|
||||
ElevenLabsGenerateSpeechBlock,
|
||||
ElevenLabsGenerateSpeechWithTimestampsBlock,
|
||||
)
|
||||
|
||||
# Speech-to-text blocks
|
||||
from .transcription import (
|
||||
ElevenLabsTranscribeAudioAsyncBlock,
|
||||
ElevenLabsTranscribeAudioSyncBlock,
|
||||
)
|
||||
|
||||
# Webhook trigger blocks
|
||||
from .triggers import ElevenLabsWebhookTriggerBlock
|
||||
|
||||
# Utility blocks
|
||||
from .utility import ElevenLabsGetUsageStatsBlock, ElevenLabsListModelsBlock
|
||||
|
||||
# Voice management blocks
|
||||
from .voices import (
|
||||
ElevenLabsCreateVoiceCloneBlock,
|
||||
ElevenLabsDeleteVoiceBlock,
|
||||
ElevenLabsGetVoiceDetailsBlock,
|
||||
ElevenLabsListVoicesBlock,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Voice management
|
||||
"ElevenLabsListVoicesBlock",
|
||||
"ElevenLabsGetVoiceDetailsBlock",
|
||||
"ElevenLabsCreateVoiceCloneBlock",
|
||||
"ElevenLabsDeleteVoiceBlock",
|
||||
# Speech generation
|
||||
"ElevenLabsGenerateSpeechBlock",
|
||||
"ElevenLabsGenerateSpeechWithTimestampsBlock",
|
||||
# Speech-to-text
|
||||
"ElevenLabsTranscribeAudioSyncBlock",
|
||||
"ElevenLabsTranscribeAudioAsyncBlock",
|
||||
# Utility
|
||||
"ElevenLabsListModelsBlock",
|
||||
"ElevenLabsGetUsageStatsBlock",
|
||||
# Webhook triggers
|
||||
"ElevenLabsWebhookTriggerBlock",
|
||||
]
|
||||
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
Shared configuration for all ElevenLabs blocks using the SDK pattern.
|
||||
"""
|
||||
|
||||
from backend.sdk import BlockCostType, ProviderBuilder
|
||||
|
||||
from ._webhook import ElevenLabsWebhookManager
|
||||
|
||||
# Configure the ElevenLabs provider with API key authentication
|
||||
elevenlabs = (
|
||||
ProviderBuilder("elevenlabs")
|
||||
.with_api_key("ELEVENLABS_API_KEY", "ElevenLabs API Key")
|
||||
.with_webhook_manager(ElevenLabsWebhookManager)
|
||||
.with_base_cost(2, BlockCostType.RUN) # Base cost for API calls
|
||||
.build()
|
||||
)
|
||||
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
ElevenLabs webhook manager for handling webhook events.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
from typing import Tuple
|
||||
|
||||
from backend.data.model import Credentials
|
||||
from backend.sdk import BaseWebhooksManager, ProviderName, Webhook
|
||||
|
||||
|
||||
class ElevenLabsWebhookManager(BaseWebhooksManager):
|
||||
"""Manages ElevenLabs webhook events."""
|
||||
|
||||
PROVIDER_NAME = ProviderName("elevenlabs")
|
||||
|
||||
@classmethod
|
||||
async def validate_payload(cls, webhook: Webhook, request) -> Tuple[dict, str]:
|
||||
"""
|
||||
Validate incoming webhook payload and signature.
|
||||
|
||||
ElevenLabs supports HMAC authentication for webhooks.
|
||||
"""
|
||||
payload = await request.json()
|
||||
|
||||
# Verify webhook signature if configured
|
||||
if webhook.secret:
|
||||
webhook_secret = webhook.config.get("webhook_secret")
|
||||
if webhook_secret:
|
||||
# Get the raw body for signature verification
|
||||
body = await request.body()
|
||||
|
||||
# Calculate expected signature
|
||||
expected_signature = hmac.new(
|
||||
webhook_secret.encode(), body, hashlib.sha256
|
||||
).hexdigest()
|
||||
|
||||
# Get signature from headers
|
||||
signature = request.headers.get("x-elevenlabs-signature")
|
||||
|
||||
if signature and not hmac.compare_digest(signature, expected_signature):
|
||||
raise ValueError("Invalid webhook signature")
|
||||
|
||||
# Extract event type from payload
|
||||
event_type = payload.get("type", "unknown")
|
||||
return payload, event_type
|
||||
|
||||
async def _register_webhook(
|
||||
self,
|
||||
credentials: Credentials,
|
||||
webhook_type: str,
|
||||
resource: str,
|
||||
events: list[str],
|
||||
ingress_url: str,
|
||||
secret: str,
|
||||
) -> tuple[str, dict]:
|
||||
"""
|
||||
Register a webhook with ElevenLabs.
|
||||
|
||||
Note: ElevenLabs webhook registration is done through their dashboard,
|
||||
not via API. This is a placeholder implementation.
|
||||
"""
|
||||
# ElevenLabs requires manual webhook setup through dashboard
|
||||
# Return empty webhook ID and config with instructions
|
||||
config = {
|
||||
"manual_setup_required": True,
|
||||
"webhook_secret": secret,
|
||||
"instructions": "Please configure webhook URL in ElevenLabs dashboard",
|
||||
}
|
||||
return "", config
|
||||
|
||||
async def _deregister_webhook(
|
||||
self, webhook: Webhook, credentials: Credentials
|
||||
) -> None:
|
||||
"""
|
||||
Deregister a webhook with ElevenLabs.
|
||||
|
||||
Note: ElevenLabs webhook removal is done through their dashboard.
|
||||
"""
|
||||
# ElevenLabs requires manual webhook removal through dashboard
|
||||
pass
|
||||
179
autogpt_platform/backend/backend/blocks/elevenlabs/speech.py
Normal file
179
autogpt_platform/backend/backend/blocks/elevenlabs/speech.py
Normal file
@@ -0,0 +1,179 @@
|
||||
"""
|
||||
ElevenLabs speech generation (text-to-speech) blocks.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from backend.sdk import (
|
||||
APIKeyCredentials,
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchema,
|
||||
CredentialsMetaInput,
|
||||
Requests,
|
||||
SchemaField,
|
||||
)
|
||||
|
||||
from ._config import elevenlabs
|
||||
|
||||
|
||||
class ElevenLabsGenerateSpeechBlock(Block):
|
||||
"""
|
||||
Turn text into audio (binary).
|
||||
"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
|
||||
description="ElevenLabs API credentials"
|
||||
)
|
||||
voice_id: str = SchemaField(description="ID of the voice to use")
|
||||
text: str = SchemaField(description="Text to convert to speech")
|
||||
model_id: str = SchemaField(
|
||||
description="Model ID to use for generation",
|
||||
default="eleven_multilingual_v2",
|
||||
)
|
||||
output_format: str = SchemaField(
|
||||
description="Audio format (e.g., mp3_44100_128)",
|
||||
default="mp3_44100_128",
|
||||
)
|
||||
voice_settings: Optional[dict] = SchemaField(
|
||||
description="Override voice settings (stability, similarity_boost, etc.)",
|
||||
default=None,
|
||||
)
|
||||
language_code: Optional[str] = SchemaField(
|
||||
description="Language code to enforce output language", default=None
|
||||
)
|
||||
seed: Optional[int] = SchemaField(
|
||||
description="Seed for reproducible output", default=None
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
audio: str = SchemaField(description="Base64-encoded audio data")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="c5d6e7f8-a9b0-c1d2-e3f4-a5b6c7d8e9f0",
|
||||
description="Generate speech audio from text using a specified voice",
|
||||
categories={BlockCategory.AI},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
import base64
|
||||
|
||||
api_key = credentials.api_key.get_secret_value()
|
||||
|
||||
# Build request body
|
||||
body: dict[str, str | int | dict] = {
|
||||
"text": input_data.text,
|
||||
"model_id": input_data.model_id,
|
||||
}
|
||||
|
||||
# Add optional fields
|
||||
if input_data.voice_settings:
|
||||
body["voice_settings"] = input_data.voice_settings
|
||||
if input_data.language_code:
|
||||
body["language_code"] = input_data.language_code
|
||||
if input_data.seed is not None:
|
||||
body["seed"] = input_data.seed
|
||||
|
||||
# Generate speech
|
||||
response = await Requests().post(
|
||||
f"https://api.elevenlabs.io/v1/text-to-speech/{input_data.voice_id}",
|
||||
headers={
|
||||
"xi-api-key": api_key,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=body,
|
||||
params={"output_format": input_data.output_format},
|
||||
)
|
||||
|
||||
# Get audio data and encode to base64
|
||||
audio_data = response.content
|
||||
audio_base64 = base64.b64encode(audio_data).decode("utf-8")
|
||||
|
||||
yield "audio", audio_base64
|
||||
|
||||
|
||||
class ElevenLabsGenerateSpeechWithTimestampsBlock(Block):
|
||||
"""
|
||||
Text to audio AND per-character timing data.
|
||||
"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
|
||||
description="ElevenLabs API credentials"
|
||||
)
|
||||
voice_id: str = SchemaField(description="ID of the voice to use")
|
||||
text: str = SchemaField(description="Text to convert to speech")
|
||||
model_id: str = SchemaField(
|
||||
description="Model ID to use for generation",
|
||||
default="eleven_multilingual_v2",
|
||||
)
|
||||
output_format: str = SchemaField(
|
||||
description="Audio format (e.g., mp3_44100_128)",
|
||||
default="mp3_44100_128",
|
||||
)
|
||||
voice_settings: Optional[dict] = SchemaField(
|
||||
description="Override voice settings (stability, similarity_boost, etc.)",
|
||||
default=None,
|
||||
)
|
||||
language_code: Optional[str] = SchemaField(
|
||||
description="Language code to enforce output language", default=None
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
audio_base64: str = SchemaField(description="Base64-encoded audio data")
|
||||
alignment: dict = SchemaField(
|
||||
description="Character-level timing alignment data"
|
||||
)
|
||||
normalized_alignment: dict = SchemaField(
|
||||
description="Normalized text alignment data"
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="d6e7f8a9-b0c1-d2e3-f4a5-b6c7d8e9f0a1",
|
||||
description="Generate speech with character-level timestamp information",
|
||||
categories={BlockCategory.AI},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
api_key = credentials.api_key.get_secret_value()
|
||||
|
||||
# Build request body
|
||||
body: dict[str, str | dict] = {
|
||||
"text": input_data.text,
|
||||
"model_id": input_data.model_id,
|
||||
}
|
||||
|
||||
# Add optional fields
|
||||
if input_data.voice_settings:
|
||||
body["voice_settings"] = input_data.voice_settings
|
||||
if input_data.language_code:
|
||||
body["language_code"] = input_data.language_code
|
||||
|
||||
# Generate speech with timestamps
|
||||
response = await Requests().post(
|
||||
f"https://api.elevenlabs.io/v1/text-to-speech/{input_data.voice_id}/with-timestamps",
|
||||
headers={
|
||||
"xi-api-key": api_key,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=body,
|
||||
params={"output_format": input_data.output_format},
|
||||
)
|
||||
|
||||
data = response.json()
|
||||
|
||||
yield "audio_base64", data.get("audio_base64", "")
|
||||
yield "alignment", data.get("alignment", {})
|
||||
yield "normalized_alignment", data.get("normalized_alignment", {})
|
||||
@@ -0,0 +1,232 @@
|
||||
"""
|
||||
ElevenLabs speech-to-text (transcription) blocks.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from backend.sdk import (
|
||||
APIKeyCredentials,
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchema,
|
||||
CredentialsMetaInput,
|
||||
Requests,
|
||||
SchemaField,
|
||||
)
|
||||
|
||||
from ._config import elevenlabs
|
||||
|
||||
|
||||
class ElevenLabsTranscribeAudioSyncBlock(Block):
|
||||
"""
|
||||
Synchronously convert audio to text (+ word timestamps, diarization).
|
||||
"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
|
||||
description="ElevenLabs API credentials"
|
||||
)
|
||||
model_id: str = SchemaField(
|
||||
description="Model ID for transcription", default="scribe_v1"
|
||||
)
|
||||
file: Optional[str] = SchemaField(
|
||||
description="Base64-encoded audio file", default=None
|
||||
)
|
||||
cloud_storage_url: Optional[str] = SchemaField(
|
||||
description="URL to audio file in cloud storage", default=None
|
||||
)
|
||||
language_code: Optional[str] = SchemaField(
|
||||
description="Language code (ISO 639-1 or -3) to improve accuracy",
|
||||
default=None,
|
||||
)
|
||||
diarize: bool = SchemaField(
|
||||
description="Enable speaker diarization", default=False
|
||||
)
|
||||
num_speakers: Optional[int] = SchemaField(
|
||||
description="Expected number of speakers (max 32)", default=None
|
||||
)
|
||||
timestamps_granularity: str = SchemaField(
|
||||
description="Timestamp detail level: word, character, or none",
|
||||
default="word",
|
||||
)
|
||||
tag_audio_events: bool = SchemaField(
|
||||
description="Tag non-speech sounds (laughter, noise)", default=True
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
text: str = SchemaField(description="Full transcribed text")
|
||||
words: list[dict] = SchemaField(
|
||||
description="Array with word timing and speaker info"
|
||||
)
|
||||
language_code: str = SchemaField(description="Detected language code")
|
||||
language_probability: float = SchemaField(
|
||||
description="Confidence in language detection"
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="e7f8a9b0-c1d2-e3f4-a5b6-c7d8e9f0a1b2",
|
||||
description="Transcribe audio to text with timing and speaker information",
|
||||
categories={BlockCategory.AI},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
import base64
|
||||
from io import BytesIO
|
||||
|
||||
api_key = credentials.api_key.get_secret_value()
|
||||
|
||||
# Validate input - must have either file or URL
|
||||
if not input_data.file and not input_data.cloud_storage_url:
|
||||
raise ValueError("Either 'file' or 'cloud_storage_url' must be provided")
|
||||
if input_data.file and input_data.cloud_storage_url:
|
||||
raise ValueError(
|
||||
"Only one of 'file' or 'cloud_storage_url' should be provided"
|
||||
)
|
||||
|
||||
# Build form data
|
||||
form_data = {
|
||||
"model_id": input_data.model_id,
|
||||
"diarize": str(input_data.diarize).lower(),
|
||||
"timestamps_granularity": input_data.timestamps_granularity,
|
||||
"tag_audio_events": str(input_data.tag_audio_events).lower(),
|
||||
}
|
||||
|
||||
if input_data.language_code:
|
||||
form_data["language_code"] = input_data.language_code
|
||||
if input_data.num_speakers is not None:
|
||||
form_data["num_speakers"] = str(input_data.num_speakers)
|
||||
|
||||
# Handle file or URL
|
||||
files = None
|
||||
if input_data.file:
|
||||
# Decode base64 file
|
||||
file_data = base64.b64decode(input_data.file)
|
||||
files = [("file", ("audio.wav", BytesIO(file_data), "audio/wav"))]
|
||||
elif input_data.cloud_storage_url:
|
||||
form_data["cloud_storage_url"] = input_data.cloud_storage_url
|
||||
|
||||
# Transcribe audio
|
||||
response = await Requests().post(
|
||||
"https://api.elevenlabs.io/v1/speech-to-text",
|
||||
headers={"xi-api-key": api_key},
|
||||
data=form_data,
|
||||
files=files,
|
||||
)
|
||||
|
||||
data = response.json()
|
||||
|
||||
yield "text", data.get("text", "")
|
||||
yield "words", data.get("words", [])
|
||||
yield "language_code", data.get("language_code", "")
|
||||
yield "language_probability", data.get("language_probability", 0.0)
|
||||
|
||||
|
||||
class ElevenLabsTranscribeAudioAsyncBlock(Block):
|
||||
"""
|
||||
Kick off transcription that returns quickly; result arrives via webhook.
|
||||
"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
|
||||
description="ElevenLabs API credentials"
|
||||
)
|
||||
model_id: str = SchemaField(
|
||||
description="Model ID for transcription", default="scribe_v1"
|
||||
)
|
||||
file: Optional[str] = SchemaField(
|
||||
description="Base64-encoded audio file", default=None
|
||||
)
|
||||
cloud_storage_url: Optional[str] = SchemaField(
|
||||
description="URL to audio file in cloud storage", default=None
|
||||
)
|
||||
language_code: Optional[str] = SchemaField(
|
||||
description="Language code (ISO 639-1 or -3) to improve accuracy",
|
||||
default=None,
|
||||
)
|
||||
diarize: bool = SchemaField(
|
||||
description="Enable speaker diarization", default=False
|
||||
)
|
||||
num_speakers: Optional[int] = SchemaField(
|
||||
description="Expected number of speakers (max 32)", default=None
|
||||
)
|
||||
timestamps_granularity: str = SchemaField(
|
||||
description="Timestamp detail level: word, character, or none",
|
||||
default="word",
|
||||
)
|
||||
webhook_url: str = SchemaField(
|
||||
description="URL to receive transcription result",
|
||||
default="",
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
tracking_id: str = SchemaField(description="ID to track the transcription job")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="f8a9b0c1-d2e3-f4a5-b6c7-d8e9f0a1b2c3",
|
||||
description="Start async transcription with webhook callback",
|
||||
categories={BlockCategory.AI},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
import base64
|
||||
import uuid
|
||||
from io import BytesIO
|
||||
|
||||
api_key = credentials.api_key.get_secret_value()
|
||||
|
||||
# Validate input
|
||||
if not input_data.file and not input_data.cloud_storage_url:
|
||||
raise ValueError("Either 'file' or 'cloud_storage_url' must be provided")
|
||||
if input_data.file and input_data.cloud_storage_url:
|
||||
raise ValueError(
|
||||
"Only one of 'file' or 'cloud_storage_url' should be provided"
|
||||
)
|
||||
|
||||
# Build form data
|
||||
form_data = {
|
||||
"model_id": input_data.model_id,
|
||||
"diarize": str(input_data.diarize).lower(),
|
||||
"timestamps_granularity": input_data.timestamps_granularity,
|
||||
"webhook": "true", # Enable async mode
|
||||
}
|
||||
|
||||
if input_data.language_code:
|
||||
form_data["language_code"] = input_data.language_code
|
||||
if input_data.num_speakers is not None:
|
||||
form_data["num_speakers"] = str(input_data.num_speakers)
|
||||
if input_data.webhook_url:
|
||||
form_data["webhook_url"] = input_data.webhook_url
|
||||
|
||||
# Handle file or URL
|
||||
files = None
|
||||
if input_data.file:
|
||||
# Decode base64 file
|
||||
file_data = base64.b64decode(input_data.file)
|
||||
files = [("file", ("audio.wav", BytesIO(file_data), "audio/wav"))]
|
||||
elif input_data.cloud_storage_url:
|
||||
form_data["cloud_storage_url"] = input_data.cloud_storage_url
|
||||
|
||||
# Start async transcription
|
||||
response = await Requests().post(
|
||||
"https://api.elevenlabs.io/v1/speech-to-text",
|
||||
headers={"xi-api-key": api_key},
|
||||
data=form_data,
|
||||
files=files,
|
||||
)
|
||||
|
||||
# Generate tracking ID (API might return one)
|
||||
data = response.json()
|
||||
tracking_id = data.get("tracking_id", str(uuid.uuid4()))
|
||||
|
||||
yield "tracking_id", tracking_id
|
||||
160
autogpt_platform/backend/backend/blocks/elevenlabs/triggers.py
Normal file
160
autogpt_platform/backend/backend/blocks/elevenlabs/triggers.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""
|
||||
ElevenLabs webhook trigger blocks.
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from backend.sdk import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchema,
|
||||
BlockType,
|
||||
BlockWebhookConfig,
|
||||
CredentialsMetaInput,
|
||||
ProviderName,
|
||||
SchemaField,
|
||||
)
|
||||
|
||||
from ._config import elevenlabs
|
||||
|
||||
|
||||
class ElevenLabsWebhookTriggerBlock(Block):
|
||||
"""
|
||||
Starts a flow when ElevenLabs POSTs an event (STT finished, voice removal, etc.).
|
||||
"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
|
||||
description="ElevenLabs API credentials"
|
||||
)
|
||||
webhook_url: str = SchemaField(
|
||||
description="URL to receive webhooks (auto-generated)",
|
||||
default="",
|
||||
hidden=True,
|
||||
)
|
||||
|
||||
class EventsFilter(BaseModel):
|
||||
"""ElevenLabs event types to subscribe to"""
|
||||
|
||||
speech_to_text_completed: bool = SchemaField(
|
||||
description="Speech-to-text transcription completed", default=True
|
||||
)
|
||||
post_call_transcription: bool = SchemaField(
|
||||
description="Conversational AI call transcription completed",
|
||||
default=True,
|
||||
)
|
||||
voice_removal_notice: bool = SchemaField(
|
||||
description="Voice scheduled for removal", default=True
|
||||
)
|
||||
voice_removed: bool = SchemaField(
|
||||
description="Voice has been removed", default=True
|
||||
)
|
||||
voice_removal_notice_withdrawn: bool = SchemaField(
|
||||
description="Voice removal cancelled", default=True
|
||||
)
|
||||
|
||||
events: EventsFilter = SchemaField(
|
||||
title="Events", description="The events to subscribe to"
|
||||
)
|
||||
|
||||
# Webhook payload - populated by the system
|
||||
payload: dict = SchemaField(
|
||||
description="Webhook payload data",
|
||||
default={},
|
||||
hidden=True,
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
type: str = SchemaField(description="Event type")
|
||||
event_timestamp: int = SchemaField(description="Unix timestamp of the event")
|
||||
data: dict = SchemaField(description="Event-specific data payload")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="c1d2e3f4-a5b6-c7d8-e9f0-a1b2c3d4e5f6",
|
||||
description="Receive webhook events from ElevenLabs",
|
||||
categories={BlockCategory.DEVELOPER_TOOLS},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
block_type=BlockType.WEBHOOK,
|
||||
webhook_config=BlockWebhookConfig(
|
||||
provider=ProviderName("elevenlabs"),
|
||||
webhook_type="notification",
|
||||
event_filter_input="events",
|
||||
resource_format="",
|
||||
),
|
||||
)
|
||||
|
||||
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
|
||||
# Extract webhook data
|
||||
payload = input_data.payload
|
||||
|
||||
# Extract event type
|
||||
event_type = payload.get("type", "unknown")
|
||||
|
||||
# Map event types to filter fields
|
||||
event_filter_map = {
|
||||
"speech_to_text_completed": input_data.events.speech_to_text_completed,
|
||||
"post_call_transcription": input_data.events.post_call_transcription,
|
||||
"voice_removal_notice": input_data.events.voice_removal_notice,
|
||||
"voice_removed": input_data.events.voice_removed,
|
||||
"voice_removal_notice_withdrawn": input_data.events.voice_removal_notice_withdrawn,
|
||||
}
|
||||
|
||||
# Check if this event type is enabled
|
||||
if not event_filter_map.get(event_type, False):
|
||||
# Skip this event
|
||||
return
|
||||
|
||||
# Extract common fields
|
||||
yield "type", event_type
|
||||
yield "event_timestamp", payload.get("event_timestamp", 0)
|
||||
|
||||
# Extract event-specific data
|
||||
data = payload.get("data", {})
|
||||
|
||||
# Process based on event type
|
||||
if event_type == "speech_to_text_completed":
|
||||
# STT transcription completed
|
||||
processed_data = {
|
||||
"transcription_id": data.get("transcription_id"),
|
||||
"text": data.get("text"),
|
||||
"words": data.get("words", []),
|
||||
"language_code": data.get("language_code"),
|
||||
"language_probability": data.get("language_probability"),
|
||||
}
|
||||
elif event_type == "post_call_transcription":
|
||||
# Conversational AI call transcription
|
||||
processed_data = {
|
||||
"agent_id": data.get("agent_id"),
|
||||
"conversation_id": data.get("conversation_id"),
|
||||
"transcript": data.get("transcript"),
|
||||
"metadata": data.get("metadata", {}),
|
||||
}
|
||||
elif event_type == "voice_removal_notice":
|
||||
# Voice scheduled for removal
|
||||
processed_data = {
|
||||
"voice_id": data.get("voice_id"),
|
||||
"voice_name": data.get("voice_name"),
|
||||
"removal_date": data.get("removal_date"),
|
||||
"reason": data.get("reason"),
|
||||
}
|
||||
elif event_type == "voice_removal_notice_withdrawn":
|
||||
# Voice removal cancelled
|
||||
processed_data = {
|
||||
"voice_id": data.get("voice_id"),
|
||||
"voice_name": data.get("voice_name"),
|
||||
}
|
||||
elif event_type == "voice_removed":
|
||||
# Voice has been removed
|
||||
processed_data = {
|
||||
"voice_id": data.get("voice_id"),
|
||||
"voice_name": data.get("voice_name"),
|
||||
"removed_at": data.get("removed_at"),
|
||||
}
|
||||
else:
|
||||
# Unknown event type, pass through raw data
|
||||
processed_data = data
|
||||
|
||||
yield "data", processed_data
|
||||
116
autogpt_platform/backend/backend/blocks/elevenlabs/utility.py
Normal file
116
autogpt_platform/backend/backend/blocks/elevenlabs/utility.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""
|
||||
ElevenLabs utility blocks for models and usage stats.
|
||||
"""
|
||||
|
||||
from backend.sdk import (
|
||||
APIKeyCredentials,
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchema,
|
||||
CredentialsMetaInput,
|
||||
Requests,
|
||||
SchemaField,
|
||||
)
|
||||
|
||||
from ._config import elevenlabs
|
||||
|
||||
|
||||
class ElevenLabsListModelsBlock(Block):
|
||||
"""
|
||||
Get all available model IDs & capabilities.
|
||||
"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
|
||||
description="ElevenLabs API credentials"
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
models: list[dict] = SchemaField(
|
||||
description="Array of model objects with capabilities"
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="a9b0c1d2-e3f4-a5b6-c7d8-e9f0a1b2c3d4",
|
||||
description="List all available voice models and their capabilities",
|
||||
categories={BlockCategory.DEVELOPER_TOOLS},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
api_key = credentials.api_key.get_secret_value()
|
||||
|
||||
# Fetch models
|
||||
response = await Requests().get(
|
||||
"https://api.elevenlabs.io/v1/models",
|
||||
headers={"xi-api-key": api_key},
|
||||
)
|
||||
|
||||
models = response.json()
|
||||
|
||||
yield "models", models
|
||||
|
||||
|
||||
class ElevenLabsGetUsageStatsBlock(Block):
|
||||
"""
|
||||
Character / credit usage for billing dashboards.
|
||||
"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
|
||||
description="ElevenLabs API credentials"
|
||||
)
|
||||
start_unix: int = SchemaField(
|
||||
description="Start timestamp in Unix epoch seconds"
|
||||
)
|
||||
end_unix: int = SchemaField(description="End timestamp in Unix epoch seconds")
|
||||
aggregation_interval: str = SchemaField(
|
||||
description="Aggregation interval: daily or monthly",
|
||||
default="daily",
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
usage: list[dict] = SchemaField(description="Array of usage data per interval")
|
||||
total_character_count: int = SchemaField(
|
||||
description="Total characters used in period"
|
||||
)
|
||||
total_requests: int = SchemaField(description="Total API requests in period")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="b0c1d2e3-f4a5-b6c7-d8e9-f0a1b2c3d4e5",
|
||||
description="Get character and credit usage statistics",
|
||||
categories={BlockCategory.DEVELOPER_TOOLS},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
api_key = credentials.api_key.get_secret_value()
|
||||
|
||||
# Build query parameters
|
||||
params = {
|
||||
"start_unix": input_data.start_unix,
|
||||
"end_unix": input_data.end_unix,
|
||||
"aggregation_interval": input_data.aggregation_interval,
|
||||
}
|
||||
|
||||
# Fetch usage stats
|
||||
response = await Requests().get(
|
||||
"https://api.elevenlabs.io/v1/usage/character-stats",
|
||||
headers={"xi-api-key": api_key},
|
||||
params=params,
|
||||
)
|
||||
|
||||
data = response.json()
|
||||
|
||||
yield "usage", data.get("usage", [])
|
||||
yield "total_character_count", data.get("total_character_count", 0)
|
||||
yield "total_requests", data.get("total_requests", 0)
|
||||
249
autogpt_platform/backend/backend/blocks/elevenlabs/voices.py
Normal file
249
autogpt_platform/backend/backend/blocks/elevenlabs/voices.py
Normal file
@@ -0,0 +1,249 @@
|
||||
"""
|
||||
ElevenLabs voice management blocks.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from backend.sdk import (
|
||||
APIKeyCredentials,
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchema,
|
||||
CredentialsMetaInput,
|
||||
Requests,
|
||||
SchemaField,
|
||||
)
|
||||
|
||||
from ._config import elevenlabs
|
||||
|
||||
|
||||
class ElevenLabsListVoicesBlock(Block):
|
||||
"""
|
||||
Fetch all voices the account can use (for pick-lists, UI menus, etc.).
|
||||
"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
|
||||
description="ElevenLabs API credentials"
|
||||
)
|
||||
search: str = SchemaField(
|
||||
description="Search term to filter voices", default=""
|
||||
)
|
||||
voice_type: Optional[str] = SchemaField(
|
||||
description="Filter by voice type: premade, cloned, or professional",
|
||||
default=None,
|
||||
)
|
||||
page_size: int = SchemaField(
|
||||
description="Number of voices per page (max 100)", default=10
|
||||
)
|
||||
next_page_token: str = SchemaField(
|
||||
description="Token for fetching next page", default=""
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
voices: list[dict] = SchemaField(
|
||||
description="Array of voice objects with id, name, category, etc."
|
||||
)
|
||||
next_page_token: Optional[str] = SchemaField(
|
||||
description="Token for fetching next page, null if no more pages"
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="e1a2b3c4-d5e6-f7a8-b9c0-d1e2f3a4b5c6",
|
||||
description="List all available voices with filtering and pagination",
|
||||
categories={BlockCategory.AI},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
api_key = credentials.api_key.get_secret_value()
|
||||
|
||||
# Build query parameters
|
||||
params: dict[str, str | int] = {"page_size": input_data.page_size}
|
||||
|
||||
if input_data.search:
|
||||
params["search"] = input_data.search
|
||||
if input_data.voice_type:
|
||||
params["voice_type"] = input_data.voice_type
|
||||
if input_data.next_page_token:
|
||||
params["next_page_token"] = input_data.next_page_token
|
||||
|
||||
# Fetch voices
|
||||
response = await Requests().get(
|
||||
"https://api.elevenlabs.io/v2/voices",
|
||||
headers={"xi-api-key": api_key},
|
||||
params=params,
|
||||
)
|
||||
|
||||
data = response.json()
|
||||
|
||||
yield "voices", data.get("voices", [])
|
||||
yield "next_page_token", data.get("next_page_token")
|
||||
|
||||
|
||||
class ElevenLabsGetVoiceDetailsBlock(Block):
|
||||
"""
|
||||
Retrieve metadata/settings for a single voice.
|
||||
"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
|
||||
description="ElevenLabs API credentials"
|
||||
)
|
||||
voice_id: str = SchemaField(description="The ID of the voice to retrieve")
|
||||
|
||||
class Output(BlockSchema):
|
||||
voice: dict = SchemaField(
|
||||
description="Voice object with name, labels, settings, etc."
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="f2a3b4c5-d6e7-f8a9-b0c1-d2e3f4a5b6c7",
|
||||
description="Get detailed information about a specific voice",
|
||||
categories={BlockCategory.AI},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
api_key = credentials.api_key.get_secret_value()
|
||||
|
||||
# Fetch voice details
|
||||
response = await Requests().get(
|
||||
f"https://api.elevenlabs.io/v1/voices/{input_data.voice_id}",
|
||||
headers={"xi-api-key": api_key},
|
||||
)
|
||||
|
||||
voice = response.json()
|
||||
|
||||
yield "voice", voice
|
||||
|
||||
|
||||
class ElevenLabsCreateVoiceCloneBlock(Block):
|
||||
"""
|
||||
Upload sample clips to create a custom (IVC) voice.
|
||||
"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
|
||||
description="ElevenLabs API credentials"
|
||||
)
|
||||
name: str = SchemaField(description="Name for the new voice")
|
||||
files: list[str] = SchemaField(
|
||||
description="Base64-encoded audio files (1-10 files, max 25MB each)"
|
||||
)
|
||||
description: str = SchemaField(
|
||||
description="Description of the voice", default=""
|
||||
)
|
||||
labels: dict = SchemaField(
|
||||
description="Metadata labels (e.g., accent, age)", default={}
|
||||
)
|
||||
remove_background_noise: bool = SchemaField(
|
||||
description="Whether to remove background noise from samples", default=False
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
voice_id: str = SchemaField(description="ID of the newly created voice")
|
||||
requires_verification: bool = SchemaField(
|
||||
description="Whether the voice requires verification"
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="a3b4c5d6-e7f8-a9b0-c1d2-e3f4a5b6c7d8",
|
||||
description="Create a new voice clone from audio samples",
|
||||
categories={BlockCategory.AI},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
import base64
|
||||
import json
|
||||
from io import BytesIO
|
||||
|
||||
api_key = credentials.api_key.get_secret_value()
|
||||
|
||||
# Prepare multipart form data
|
||||
form_data = {
|
||||
"name": input_data.name,
|
||||
}
|
||||
|
||||
if input_data.description:
|
||||
form_data["description"] = input_data.description
|
||||
if input_data.labels:
|
||||
form_data["labels"] = json.dumps(input_data.labels)
|
||||
if input_data.remove_background_noise:
|
||||
form_data["remove_background_noise"] = "true"
|
||||
|
||||
# Prepare files
|
||||
files = []
|
||||
for i, file_b64 in enumerate(input_data.files):
|
||||
file_data = base64.b64decode(file_b64)
|
||||
files.append(
|
||||
("files", (f"sample_{i}.mp3", BytesIO(file_data), "audio/mpeg"))
|
||||
)
|
||||
|
||||
# Create voice
|
||||
response = await Requests().post(
|
||||
"https://api.elevenlabs.io/v1/voices/add",
|
||||
headers={"xi-api-key": api_key},
|
||||
data=form_data,
|
||||
files=files,
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
|
||||
yield "voice_id", result.get("voice_id", "")
|
||||
yield "requires_verification", result.get("requires_verification", False)
|
||||
|
||||
|
||||
class ElevenLabsDeleteVoiceBlock(Block):
|
||||
"""
|
||||
Permanently remove a custom voice.
|
||||
"""
|
||||
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput = elevenlabs.credentials_field(
|
||||
description="ElevenLabs API credentials"
|
||||
)
|
||||
voice_id: str = SchemaField(description="The ID of the voice to delete")
|
||||
|
||||
class Output(BlockSchema):
|
||||
status: str = SchemaField(description="Deletion status (ok or error)")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="b4c5d6e7-f8a9-b0c1-d2e3-f4a5b6c7d8e9",
|
||||
description="Delete a custom voice from your account",
|
||||
categories={BlockCategory.AI},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
api_key = credentials.api_key.get_secret_value()
|
||||
|
||||
# Delete voice
|
||||
response = await Requests().delete(
|
||||
f"https://api.elevenlabs.io/v1/voices/{input_data.voice_id}",
|
||||
headers={"xi-api-key": api_key},
|
||||
)
|
||||
|
||||
# Check if successful
|
||||
if response.status in [200, 204]:
|
||||
yield "status", "ok"
|
||||
else:
|
||||
yield "status", "error"
|
||||
Reference in New Issue
Block a user