diff --git a/.github/workflows/platform-frontend-ci.yml b/.github/workflows/platform-frontend-ci.yml
index 499bb03170..14676a6a1f 100644
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -27,11 +27,20 @@ jobs:
runs-on: ubuntu-latest
outputs:
cache-key: ${{ steps.cache-key.outputs.key }}
+ components-changed: ${{ steps.filter.outputs.components }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
+ - name: Check for component changes
+ uses: dorny/paths-filter@v3
+ id: filter
+ with:
+ filters: |
+ components:
+ - 'autogpt_platform/frontend/src/components/**'
+
- name: Set up Node.js
uses: actions/setup-node@v4
with:
@@ -90,8 +99,11 @@ jobs:
chromatic:
runs-on: ubuntu-latest
needs: setup
- # Only run on dev branch pushes or PRs targeting dev
- if: github.ref == 'refs/heads/dev' || github.base_ref == 'dev'
+ # Disabled: to re-enable, remove 'false &&' from the condition below
+ if: >-
+ false
+ && (github.ref == 'refs/heads/dev' || github.base_ref == 'dev')
+ && needs.setup.outputs.components-changed == 'true'
steps:
- name: Checkout repository
diff --git a/autogpt_platform/backend/.env.default b/autogpt_platform/backend/.env.default
index b393f13017..fa52ba812a 100644
--- a/autogpt_platform/backend/.env.default
+++ b/autogpt_platform/backend/.env.default
@@ -152,6 +152,7 @@ REPLICATE_API_KEY=
REVID_API_KEY=
SCREENSHOTONE_API_KEY=
UNREAL_SPEECH_API_KEY=
+ELEVENLABS_API_KEY=
# Data & Search Services
E2B_API_KEY=
diff --git a/autogpt_platform/backend/.gitignore b/autogpt_platform/backend/.gitignore
index 9224c07d9e..6e688311a6 100644
--- a/autogpt_platform/backend/.gitignore
+++ b/autogpt_platform/backend/.gitignore
@@ -19,3 +19,6 @@ load-tests/*.json
load-tests/*.log
load-tests/node_modules/*
migrations/*/rollback*.sql
+
+# Workspace files
+workspaces/
diff --git a/autogpt_platform/backend/Dockerfile b/autogpt_platform/backend/Dockerfile
index 103226d079..9bd455e490 100644
--- a/autogpt_platform/backend/Dockerfile
+++ b/autogpt_platform/backend/Dockerfile
@@ -62,10 +62,12 @@ ENV POETRY_HOME=/opt/poetry \
DEBIAN_FRONTEND=noninteractive
ENV PATH=/opt/poetry/bin:$PATH
-# Install Python without upgrading system-managed packages
+# Install Python, FFmpeg, and ImageMagick (required for video processing blocks)
RUN apt-get update && apt-get install -y \
python3.13 \
python3-pip \
+ ffmpeg \
+ imagemagick \
&& rm -rf /var/lib/apt/lists/*
# Copy only necessary files from builder
diff --git a/autogpt_platform/backend/backend/api/features/chat/config.py b/autogpt_platform/backend/backend/api/features/chat/config.py
index 2e8dbf5413..0b37e42df8 100644
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -11,7 +11,7 @@ class ChatConfig(BaseSettings):
# OpenAI API Configuration
model: str = Field(
- default="anthropic/claude-opus-4.5", description="Default model to use"
+ default="anthropic/claude-opus-4.6", description="Default model to use"
)
title_model: str = Field(
default="openai/gpt-4o-mini",
diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py
index 218575085b..06da6bdf2b 100644
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -33,7 +33,7 @@ from backend.data.understanding import (
get_business_understanding,
)
from backend.util.exceptions import NotFoundError
-from backend.util.settings import Settings
+from backend.util.settings import AppEnvironment, Settings
from . import db as chat_db
from . import stream_registry
@@ -222,8 +222,18 @@ async def _get_system_prompt_template(context: str) -> str:
try:
# cache_ttl_seconds=0 disables SDK caching to always get the latest prompt
# Use asyncio.to_thread to avoid blocking the event loop
+ # In non-production environments, fetch the latest prompt version
+ # instead of the production-labeled version for easier testing
+ label = (
+ None
+ if settings.config.app_env == AppEnvironment.PRODUCTION
+ else "latest"
+ )
prompt = await asyncio.to_thread(
- langfuse.get_prompt, config.langfuse_prompt_name, cache_ttl_seconds=0
+ langfuse.get_prompt,
+ config.langfuse_prompt_name,
+ label=label,
+ cache_ttl_seconds=0,
)
return prompt.compile(users_information=context)
except Exception as e:
@@ -618,6 +628,9 @@ async def stream_chat_completion(
total_tokens=chunk.totalTokens,
)
)
+ elif isinstance(chunk, StreamHeartbeat):
+ # Pass through heartbeat to keep SSE connection alive
+ yield chunk
else:
logger.error(f"Unknown chunk type: {type(chunk)}", exc_info=True)
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
index b88b9b2924..f83ca30b5c 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
@@ -7,15 +7,7 @@ from typing import Any, NotRequired, TypedDict
from backend.api.features.library import db as library_db
from backend.api.features.store import db as store_db
-from backend.data.graph import (
- Graph,
- Link,
- Node,
- create_graph,
- get_graph,
- get_graph_all_versions,
- get_store_listed_graphs,
-)
+from backend.data.graph import Graph, Link, Node, get_graph, get_store_listed_graphs
from backend.util.exceptions import DatabaseError, NotFoundError
from .service import (
@@ -28,8 +20,6 @@ from .service import (
logger = logging.getLogger(__name__)
-AGENT_EXECUTOR_BLOCK_ID = "e189baac-8c20-45a1-94a7-55177ea42565"
-
class ExecutionSummary(TypedDict):
"""Summary of a single execution for quality assessment."""
@@ -669,45 +659,6 @@ def json_to_graph(agent_json: dict[str, Any]) -> Graph:
)
-def _reassign_node_ids(graph: Graph) -> None:
- """Reassign all node and link IDs to new UUIDs.
-
- This is needed when creating a new version to avoid unique constraint violations.
- """
- id_map = {node.id: str(uuid.uuid4()) for node in graph.nodes}
-
- for node in graph.nodes:
- node.id = id_map[node.id]
-
- for link in graph.links:
- link.id = str(uuid.uuid4())
- if link.source_id in id_map:
- link.source_id = id_map[link.source_id]
- if link.sink_id in id_map:
- link.sink_id = id_map[link.sink_id]
-
-
-def _populate_agent_executor_user_ids(agent_json: dict[str, Any], user_id: str) -> None:
- """Populate user_id in AgentExecutorBlock nodes.
-
- The external agent generator creates AgentExecutorBlock nodes with empty user_id.
- This function fills in the actual user_id so sub-agents run with correct permissions.
-
- Args:
- agent_json: Agent JSON dict (modified in place)
- user_id: User ID to set
- """
- for node in agent_json.get("nodes", []):
- if node.get("block_id") == AGENT_EXECUTOR_BLOCK_ID:
- input_default = node.get("input_default") or {}
- if not input_default.get("user_id"):
- input_default["user_id"] = user_id
- node["input_default"] = input_default
- logger.debug(
- f"Set user_id for AgentExecutorBlock node {node.get('id')}"
- )
-
-
async def save_agent_to_library(
agent_json: dict[str, Any], user_id: str, is_update: bool = False
) -> tuple[Graph, Any]:
@@ -721,35 +672,10 @@ async def save_agent_to_library(
Returns:
Tuple of (created Graph, LibraryAgent)
"""
- # Populate user_id in AgentExecutorBlock nodes before conversion
- _populate_agent_executor_user_ids(agent_json, user_id)
-
graph = json_to_graph(agent_json)
-
if is_update:
- if graph.id:
- existing_versions = await get_graph_all_versions(graph.id, user_id)
- if existing_versions:
- latest_version = max(v.version for v in existing_versions)
- graph.version = latest_version + 1
- _reassign_node_ids(graph)
- logger.info(f"Updating agent {graph.id} to version {graph.version}")
- else:
- graph.id = str(uuid.uuid4())
- graph.version = 1
- _reassign_node_ids(graph)
- logger.info(f"Creating new agent with ID {graph.id}")
-
- created_graph = await create_graph(graph, user_id)
-
- library_agents = await library_db.create_library_agent(
- graph=created_graph,
- user_id=user_id,
- sensitive_action_safe_mode=True,
- create_library_agents_for_sub_graphs=False,
- )
-
- return created_graph, library_agents[0]
+ return await library_db.update_graph_in_library(graph, user_id)
+ return await library_db.create_graph_in_library(graph, user_id)
def graph_to_json(graph: Graph) -> dict[str, Any]:
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py b/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py
index 62d59c470e..61cdba1ef9 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py
@@ -206,9 +206,9 @@ async def search_agents(
]
)
no_results_msg = (
- f"No agents found matching '{query}'. Try different keywords or browse the marketplace."
+ f"No agents found matching '{query}'. Let the user know they can try different keywords or browse the marketplace. Also let them know you can create a custom agent for them based on their needs."
if source == "marketplace"
- else f"No agents matching '{query}' found in your library."
+ else f"No agents matching '{query}' found in your library. Let the user know you can create a custom agent for them based on their needs."
)
return NoResultsResponse(
message=no_results_msg, session_id=session_id, suggestions=suggestions
@@ -224,10 +224,10 @@ async def search_agents(
message = (
"Now you have found some options for the user to choose from. "
"You can add a link to a recommended agent at: /marketplace/agent/agent_id "
- "Please ask the user if they would like to use any of these agents."
+ "Please ask the user if they would like to use any of these agents. Let the user know we can create a custom agent for them based on their needs."
if source == "marketplace"
else "Found agents in the user's library. You can provide a link to view an agent at: "
- "/library/agents/{agent_id}. Use agent_output to get execution results, or run_agent to execute."
+ "/library/agents/{agent_id}. Use agent_output to get execution results, or run_agent to execute. Let the user know we can create a custom agent for them based on their needs."
)
return AgentsFoundResponse(
diff --git a/autogpt_platform/backend/backend/api/features/library/db.py b/autogpt_platform/backend/backend/api/features/library/db.py
index 394f959953..6bebfb573c 100644
--- a/autogpt_platform/backend/backend/api/features/library/db.py
+++ b/autogpt_platform/backend/backend/api/features/library/db.py
@@ -19,7 +19,10 @@ from backend.data.graph import GraphSettings
from backend.data.includes import AGENT_PRESET_INCLUDE, library_agent_include
from backend.data.model import CredentialsMetaInput
from backend.integrations.creds_manager import IntegrationCredentialsManager
-from backend.integrations.webhooks.graph_lifecycle_hooks import on_graph_activate
+from backend.integrations.webhooks.graph_lifecycle_hooks import (
+ on_graph_activate,
+ on_graph_deactivate,
+)
from backend.util.clients import get_scheduler_client
from backend.util.exceptions import DatabaseError, InvalidInputError, NotFoundError
from backend.util.json import SafeJson
@@ -537,6 +540,92 @@ async def update_agent_version_in_library(
return library_model.LibraryAgent.from_db(lib)
+async def create_graph_in_library(
+ graph: graph_db.Graph,
+ user_id: str,
+) -> tuple[graph_db.GraphModel, library_model.LibraryAgent]:
+ """Create a new graph and add it to the user's library."""
+ graph.version = 1
+ graph_model = graph_db.make_graph_model(graph, user_id)
+ graph_model.reassign_ids(user_id=user_id, reassign_graph_id=True)
+
+ created_graph = await graph_db.create_graph(graph_model, user_id)
+
+ library_agents = await create_library_agent(
+ graph=created_graph,
+ user_id=user_id,
+ sensitive_action_safe_mode=True,
+ create_library_agents_for_sub_graphs=False,
+ )
+
+ if created_graph.is_active:
+ created_graph = await on_graph_activate(created_graph, user_id=user_id)
+
+ return created_graph, library_agents[0]
+
+
+async def update_graph_in_library(
+ graph: graph_db.Graph,
+ user_id: str,
+) -> tuple[graph_db.GraphModel, library_model.LibraryAgent]:
+ """Create a new version of an existing graph and update the library entry."""
+ existing_versions = await graph_db.get_graph_all_versions(graph.id, user_id)
+ current_active_version = (
+ next((v for v in existing_versions if v.is_active), None)
+ if existing_versions
+ else None
+ )
+ graph.version = (
+ max(v.version for v in existing_versions) + 1 if existing_versions else 1
+ )
+
+ graph_model = graph_db.make_graph_model(graph, user_id)
+ graph_model.reassign_ids(user_id=user_id, reassign_graph_id=False)
+
+ created_graph = await graph_db.create_graph(graph_model, user_id)
+
+ library_agent = await get_library_agent_by_graph_id(user_id, created_graph.id)
+ if not library_agent:
+ raise NotFoundError(f"Library agent not found for graph {created_graph.id}")
+
+ library_agent = await update_library_agent_version_and_settings(
+ user_id, created_graph
+ )
+
+ if created_graph.is_active:
+ created_graph = await on_graph_activate(created_graph, user_id=user_id)
+ await graph_db.set_graph_active_version(
+ graph_id=created_graph.id,
+ version=created_graph.version,
+ user_id=user_id,
+ )
+ if current_active_version:
+ await on_graph_deactivate(current_active_version, user_id=user_id)
+
+ return created_graph, library_agent
+
+
+async def update_library_agent_version_and_settings(
+ user_id: str, agent_graph: graph_db.GraphModel
+) -> library_model.LibraryAgent:
+ """Update library agent to point to new graph version and sync settings."""
+ library = await update_agent_version_in_library(
+ user_id, agent_graph.id, agent_graph.version
+ )
+ updated_settings = GraphSettings.from_graph(
+ graph=agent_graph,
+ hitl_safe_mode=library.settings.human_in_the_loop_safe_mode,
+ sensitive_action_safe_mode=library.settings.sensitive_action_safe_mode,
+ )
+ if updated_settings != library.settings:
+ library = await update_library_agent(
+ library_agent_id=library.id,
+ user_id=user_id,
+ settings=updated_settings,
+ )
+ return library
+
+
async def update_library_agent(
library_agent_id: str,
user_id: str,
diff --git a/autogpt_platform/backend/backend/api/features/v1.py b/autogpt_platform/backend/backend/api/features/v1.py
index 09d3759a65..a8610702cc 100644
--- a/autogpt_platform/backend/backend/api/features/v1.py
+++ b/autogpt_platform/backend/backend/api/features/v1.py
@@ -101,7 +101,6 @@ from backend.util.timezone_utils import (
from backend.util.virus_scanner import scan_content_safe
from .library import db as library_db
-from .library import model as library_model
from .store.model import StoreAgentDetails
@@ -823,18 +822,16 @@ async def update_graph(
graph: graph_db.Graph,
user_id: Annotated[str, Security(get_user_id)],
) -> graph_db.GraphModel:
- # Sanity check
if graph.id and graph.id != graph_id:
raise HTTPException(400, detail="Graph ID does not match ID in URI")
- # Determine new version
existing_versions = await graph_db.get_graph_all_versions(graph_id, user_id=user_id)
if not existing_versions:
raise HTTPException(404, detail=f"Graph #{graph_id} not found")
- latest_version_number = max(g.version for g in existing_versions)
- graph.version = latest_version_number + 1
+ graph.version = max(g.version for g in existing_versions) + 1
current_active_version = next((v for v in existing_versions if v.is_active), None)
+
graph = graph_db.make_graph_model(graph, user_id)
graph.reassign_ids(user_id=user_id, reassign_graph_id=False)
graph.validate_graph(for_run=False)
@@ -842,27 +839,23 @@ async def update_graph(
new_graph_version = await graph_db.create_graph(graph, user_id=user_id)
if new_graph_version.is_active:
- # Keep the library agent up to date with the new active version
- await _update_library_agent_version_and_settings(user_id, new_graph_version)
-
- # Handle activation of the new graph first to ensure continuity
+ await library_db.update_library_agent_version_and_settings(
+ user_id, new_graph_version
+ )
new_graph_version = await on_graph_activate(new_graph_version, user_id=user_id)
- # Ensure new version is the only active version
await graph_db.set_graph_active_version(
graph_id=graph_id, version=new_graph_version.version, user_id=user_id
)
if current_active_version:
- # Handle deactivation of the previously active version
await on_graph_deactivate(current_active_version, user_id=user_id)
- # Fetch new graph version *with sub-graphs* (needed for credentials input schema)
new_graph_version_with_subgraphs = await graph_db.get_graph(
graph_id,
new_graph_version.version,
user_id=user_id,
include_subgraphs=True,
)
- assert new_graph_version_with_subgraphs # make type checker happy
+ assert new_graph_version_with_subgraphs
return new_graph_version_with_subgraphs
@@ -900,33 +893,15 @@ async def set_graph_active_version(
)
# Keep the library agent up to date with the new active version
- await _update_library_agent_version_and_settings(user_id, new_active_graph)
+ await library_db.update_library_agent_version_and_settings(
+ user_id, new_active_graph
+ )
if current_active_graph and current_active_graph.version != new_active_version:
# Handle deactivation of the previously active version
await on_graph_deactivate(current_active_graph, user_id=user_id)
-async def _update_library_agent_version_and_settings(
- user_id: str, agent_graph: graph_db.GraphModel
-) -> library_model.LibraryAgent:
- library = await library_db.update_agent_version_in_library(
- user_id, agent_graph.id, agent_graph.version
- )
- updated_settings = GraphSettings.from_graph(
- graph=agent_graph,
- hitl_safe_mode=library.settings.human_in_the_loop_safe_mode,
- sensitive_action_safe_mode=library.settings.sensitive_action_safe_mode,
- )
- if updated_settings != library.settings:
- library = await library_db.update_library_agent(
- library_agent_id=library.id,
- user_id=user_id,
- settings=updated_settings,
- )
- return library
-
-
@v1_router.patch(
path="/graphs/{graph_id}/settings",
summary="Update graph settings",
diff --git a/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py b/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py
new file mode 100644
index 0000000000..b823627b43
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py
@@ -0,0 +1,28 @@
+"""ElevenLabs integration blocks - test credentials and shared utilities."""
+
+from typing import Literal
+
+from pydantic import SecretStr
+
+from backend.data.model import APIKeyCredentials, CredentialsMetaInput
+from backend.integrations.providers import ProviderName
+
+TEST_CREDENTIALS = APIKeyCredentials(
+ id="01234567-89ab-cdef-0123-456789abcdef",
+ provider="elevenlabs",
+ api_key=SecretStr("mock-elevenlabs-api-key"),
+ title="Mock ElevenLabs API key",
+ expires_at=None,
+)
+
+TEST_CREDENTIALS_INPUT = {
+ "provider": TEST_CREDENTIALS.provider,
+ "id": TEST_CREDENTIALS.id,
+ "type": TEST_CREDENTIALS.type,
+ "title": TEST_CREDENTIALS.title,
+}
+
+ElevenLabsCredentials = APIKeyCredentials
+ElevenLabsCredentialsInput = CredentialsMetaInput[
+ Literal[ProviderName.ELEVENLABS], Literal["api_key"]
+]
diff --git a/autogpt_platform/backend/backend/blocks/encoder_block.py b/autogpt_platform/backend/backend/blocks/encoder_block.py
new file mode 100644
index 0000000000..b60a4ae828
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/encoder_block.py
@@ -0,0 +1,77 @@
+"""Text encoding block for converting special characters to escape sequences."""
+
+import codecs
+
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+
+
+class TextEncoderBlock(Block):
+ """
+ Encodes a string by converting special characters into escape sequences.
+
+ This block is the inverse of TextDecoderBlock. It takes text containing
+ special characters (like newlines, tabs, etc.) and converts them into
+ their escape sequence representations (e.g., newline becomes \\n).
+ """
+
+ class Input(BlockSchemaInput):
+ """Input schema for TextEncoderBlock."""
+
+ text: str = SchemaField(
+ description="A string containing special characters to be encoded",
+ placeholder="Your text with newlines and quotes to encode",
+ )
+
+ class Output(BlockSchemaOutput):
+ """Output schema for TextEncoderBlock."""
+
+ encoded_text: str = SchemaField(
+ description="The encoded text with special characters converted to escape sequences"
+ )
+ error: str = SchemaField(description="Error message if encoding fails")
+
+ def __init__(self):
+ super().__init__(
+ id="5185f32e-4b65-4ecf-8fbb-873f003f09d6",
+ description="Encodes a string by converting special characters into escape sequences",
+ categories={BlockCategory.TEXT},
+ input_schema=TextEncoderBlock.Input,
+ output_schema=TextEncoderBlock.Output,
+ test_input={
+ "text": """Hello
+World!
+This is a "quoted" string."""
+ },
+ test_output=[
+ (
+ "encoded_text",
+ """Hello\\nWorld!\\nThis is a "quoted" string.""",
+ )
+ ],
+ )
+
+ async def run(self, input_data: Input, **kwargs) -> BlockOutput:
+ """
+ Encode the input text by converting special characters to escape sequences.
+
+ Args:
+ input_data: The input containing the text to encode.
+ **kwargs: Additional keyword arguments (unused).
+
+ Yields:
+ The encoded text with escape sequences, or an error message if encoding fails.
+ """
+ try:
+ encoded_text = codecs.encode(input_data.text, "unicode_escape").decode(
+ "utf-8"
+ )
+ yield "encoded_text", encoded_text
+ except Exception as e:
+ yield "error", f"Encoding error: {str(e)}"
diff --git a/autogpt_platform/backend/backend/blocks/llm.py b/autogpt_platform/backend/backend/blocks/llm.py
index 54295da1f1..be2b85949e 100644
--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -115,6 +115,7 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
CLAUDE_4_5_OPUS = "claude-opus-4-5-20251101"
CLAUDE_4_5_SONNET = "claude-sonnet-4-5-20250929"
CLAUDE_4_5_HAIKU = "claude-haiku-4-5-20251001"
+ CLAUDE_4_6_OPUS = "claude-opus-4-6"
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
# AI/ML API models
AIML_API_QWEN2_5_72B = "Qwen/Qwen2.5-72B-Instruct-Turbo"
@@ -270,6 +271,9 @@ MODEL_METADATA = {
LlmModel.CLAUDE_4_SONNET: ModelMetadata(
"anthropic", 200000, 64000, "Claude Sonnet 4", "Anthropic", "Anthropic", 2
), # claude-4-sonnet-20250514
+ LlmModel.CLAUDE_4_6_OPUS: ModelMetadata(
+ "anthropic", 200000, 128000, "Claude Opus 4.6", "Anthropic", "Anthropic", 3
+ ), # claude-opus-4-6
LlmModel.CLAUDE_4_5_OPUS: ModelMetadata(
"anthropic", 200000, 64000, "Claude Opus 4.5", "Anthropic", "Anthropic", 3
), # claude-opus-4-5-20251101
diff --git a/autogpt_platform/backend/backend/blocks/media.py b/autogpt_platform/backend/backend/blocks/media.py
deleted file mode 100644
index a8d145bc64..0000000000
--- a/autogpt_platform/backend/backend/blocks/media.py
+++ /dev/null
@@ -1,246 +0,0 @@
-import os
-import tempfile
-from typing import Optional
-
-from moviepy.audio.io.AudioFileClip import AudioFileClip
-from moviepy.video.fx.Loop import Loop
-from moviepy.video.io.VideoFileClip import VideoFileClip
-
-from backend.data.block import (
- Block,
- BlockCategory,
- BlockOutput,
- BlockSchemaInput,
- BlockSchemaOutput,
-)
-from backend.data.execution import ExecutionContext
-from backend.data.model import SchemaField
-from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
-
-
-class MediaDurationBlock(Block):
-
- class Input(BlockSchemaInput):
- media_in: MediaFileType = SchemaField(
- description="Media input (URL, data URI, or local path)."
- )
- is_video: bool = SchemaField(
- description="Whether the media is a video (True) or audio (False).",
- default=True,
- )
-
- class Output(BlockSchemaOutput):
- duration: float = SchemaField(
- description="Duration of the media file (in seconds)."
- )
-
- def __init__(self):
- super().__init__(
- id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6",
- description="Block to get the duration of a media file.",
- categories={BlockCategory.MULTIMEDIA},
- input_schema=MediaDurationBlock.Input,
- output_schema=MediaDurationBlock.Output,
- )
-
- async def run(
- self,
- input_data: Input,
- *,
- execution_context: ExecutionContext,
- **kwargs,
- ) -> BlockOutput:
- # 1) Store the input media locally
- local_media_path = await store_media_file(
- file=input_data.media_in,
- execution_context=execution_context,
- return_format="for_local_processing",
- )
- assert execution_context.graph_exec_id is not None
- media_abspath = get_exec_file_path(
- execution_context.graph_exec_id, local_media_path
- )
-
- # 2) Load the clip
- if input_data.is_video:
- clip = VideoFileClip(media_abspath)
- else:
- clip = AudioFileClip(media_abspath)
-
- yield "duration", clip.duration
-
-
-class LoopVideoBlock(Block):
- """
- Block for looping (repeating) a video clip until a given duration or number of loops.
- """
-
- class Input(BlockSchemaInput):
- video_in: MediaFileType = SchemaField(
- description="The input video (can be a URL, data URI, or local path)."
- )
- # Provide EITHER a `duration` or `n_loops` or both. We'll demonstrate `duration`.
- duration: Optional[float] = SchemaField(
- description="Target duration (in seconds) to loop the video to. If omitted, defaults to no looping.",
- default=None,
- ge=0.0,
- )
- n_loops: Optional[int] = SchemaField(
- description="Number of times to repeat the video. If omitted, defaults to 1 (no repeat).",
- default=None,
- ge=1,
- )
-
- class Output(BlockSchemaOutput):
- video_out: str = SchemaField(
- description="Looped video returned either as a relative path or a data URI."
- )
-
- def __init__(self):
- super().__init__(
- id="8bf9eef6-5451-4213-b265-25306446e94b",
- description="Block to loop a video to a given duration or number of repeats.",
- categories={BlockCategory.MULTIMEDIA},
- input_schema=LoopVideoBlock.Input,
- output_schema=LoopVideoBlock.Output,
- )
-
- async def run(
- self,
- input_data: Input,
- *,
- execution_context: ExecutionContext,
- **kwargs,
- ) -> BlockOutput:
- assert execution_context.graph_exec_id is not None
- assert execution_context.node_exec_id is not None
- graph_exec_id = execution_context.graph_exec_id
- node_exec_id = execution_context.node_exec_id
-
- # 1) Store the input video locally
- local_video_path = await store_media_file(
- file=input_data.video_in,
- execution_context=execution_context,
- return_format="for_local_processing",
- )
- input_abspath = get_exec_file_path(graph_exec_id, local_video_path)
-
- # 2) Load the clip
- clip = VideoFileClip(input_abspath)
-
- # 3) Apply the loop effect
- looped_clip = clip
- if input_data.duration:
- # Loop until we reach the specified duration
- looped_clip = looped_clip.with_effects([Loop(duration=input_data.duration)])
- elif input_data.n_loops:
- looped_clip = looped_clip.with_effects([Loop(n=input_data.n_loops)])
- else:
- raise ValueError("Either 'duration' or 'n_loops' must be provided.")
-
- assert isinstance(looped_clip, VideoFileClip)
-
- # 4) Save the looped output
- output_filename = MediaFileType(
- f"{node_exec_id}_looped_{os.path.basename(local_video_path)}"
- )
- output_abspath = get_exec_file_path(graph_exec_id, output_filename)
-
- looped_clip = looped_clip.with_audio(clip.audio)
- looped_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
-
- # Return output - for_block_output returns workspace:// if available, else data URI
- video_out = await store_media_file(
- file=output_filename,
- execution_context=execution_context,
- return_format="for_block_output",
- )
-
- yield "video_out", video_out
-
-
-class AddAudioToVideoBlock(Block):
- """
- Block that adds (attaches) an audio track to an existing video.
- Optionally scale the volume of the new track.
- """
-
- class Input(BlockSchemaInput):
- video_in: MediaFileType = SchemaField(
- description="Video input (URL, data URI, or local path)."
- )
- audio_in: MediaFileType = SchemaField(
- description="Audio input (URL, data URI, or local path)."
- )
- volume: float = SchemaField(
- description="Volume scale for the newly attached audio track (1.0 = original).",
- default=1.0,
- )
-
- class Output(BlockSchemaOutput):
- video_out: MediaFileType = SchemaField(
- description="Final video (with attached audio), as a path or data URI."
- )
-
- def __init__(self):
- super().__init__(
- id="3503748d-62b6-4425-91d6-725b064af509",
- description="Block to attach an audio file to a video file using moviepy.",
- categories={BlockCategory.MULTIMEDIA},
- input_schema=AddAudioToVideoBlock.Input,
- output_schema=AddAudioToVideoBlock.Output,
- )
-
- async def run(
- self,
- input_data: Input,
- *,
- execution_context: ExecutionContext,
- **kwargs,
- ) -> BlockOutput:
- assert execution_context.graph_exec_id is not None
- assert execution_context.node_exec_id is not None
- graph_exec_id = execution_context.graph_exec_id
- node_exec_id = execution_context.node_exec_id
-
- # 1) Store the inputs locally
- local_video_path = await store_media_file(
- file=input_data.video_in,
- execution_context=execution_context,
- return_format="for_local_processing",
- )
- local_audio_path = await store_media_file(
- file=input_data.audio_in,
- execution_context=execution_context,
- return_format="for_local_processing",
- )
-
- abs_temp_dir = os.path.join(tempfile.gettempdir(), "exec_file", graph_exec_id)
- video_abspath = os.path.join(abs_temp_dir, local_video_path)
- audio_abspath = os.path.join(abs_temp_dir, local_audio_path)
-
- # 2) Load video + audio with moviepy
- video_clip = VideoFileClip(video_abspath)
- audio_clip = AudioFileClip(audio_abspath)
- # Optionally scale volume
- if input_data.volume != 1.0:
- audio_clip = audio_clip.with_volume_scaled(input_data.volume)
-
- # 3) Attach the new audio track
- final_clip = video_clip.with_audio(audio_clip)
-
- # 4) Write to output file
- output_filename = MediaFileType(
- f"{node_exec_id}_audio_attached_{os.path.basename(local_video_path)}"
- )
- output_abspath = os.path.join(abs_temp_dir, output_filename)
- final_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
-
- # 5) Return output - for_block_output returns workspace:// if available, else data URI
- video_out = await store_media_file(
- file=output_filename,
- execution_context=execution_context,
- return_format="for_block_output",
- )
-
- yield "video_out", video_out
diff --git a/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py b/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py
new file mode 100644
index 0000000000..1e9b9fed4f
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py
@@ -0,0 +1,77 @@
+import pytest
+
+from backend.blocks.encoder_block import TextEncoderBlock
+
+
+@pytest.mark.asyncio
+async def test_text_encoder_basic():
+ """Test basic encoding of newlines and special characters."""
+ block = TextEncoderBlock()
+ result = []
+ async for output in block.run(TextEncoderBlock.Input(text="Hello\nWorld")):
+ result.append(output)
+
+ assert len(result) == 1
+ assert result[0][0] == "encoded_text"
+ assert result[0][1] == "Hello\\nWorld"
+
+
+@pytest.mark.asyncio
+async def test_text_encoder_multiple_escapes():
+ """Test encoding of multiple escape sequences."""
+ block = TextEncoderBlock()
+ result = []
+ async for output in block.run(
+ TextEncoderBlock.Input(text="Line1\nLine2\tTabbed\rCarriage")
+ ):
+ result.append(output)
+
+ assert len(result) == 1
+ assert result[0][0] == "encoded_text"
+ assert "\\n" in result[0][1]
+ assert "\\t" in result[0][1]
+ assert "\\r" in result[0][1]
+
+
+@pytest.mark.asyncio
+async def test_text_encoder_unicode():
+ """Test that unicode characters are handled correctly."""
+ block = TextEncoderBlock()
+ result = []
+ async for output in block.run(TextEncoderBlock.Input(text="Hello δΈη\n")):
+ result.append(output)
+
+ assert len(result) == 1
+ assert result[0][0] == "encoded_text"
+ # Unicode characters should be escaped as \uXXXX sequences
+ assert "\\n" in result[0][1]
+
+
+@pytest.mark.asyncio
+async def test_text_encoder_empty_string():
+ """Test encoding of an empty string."""
+ block = TextEncoderBlock()
+ result = []
+ async for output in block.run(TextEncoderBlock.Input(text="")):
+ result.append(output)
+
+ assert len(result) == 1
+ assert result[0][0] == "encoded_text"
+ assert result[0][1] == ""
+
+
+@pytest.mark.asyncio
+async def test_text_encoder_error_handling():
+ """Test that encoding errors are handled gracefully."""
+ from unittest.mock import patch
+
+ block = TextEncoderBlock()
+ result = []
+
+ with patch("codecs.encode", side_effect=Exception("Mocked encoding error")):
+ async for output in block.run(TextEncoderBlock.Input(text="test")):
+ result.append(output)
+
+ assert len(result) == 1
+ assert result[0][0] == "error"
+ assert "Mocked encoding error" in result[0][1]
diff --git a/autogpt_platform/backend/backend/blocks/video/__init__.py b/autogpt_platform/backend/backend/blocks/video/__init__.py
new file mode 100644
index 0000000000..4974ae8a87
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/__init__.py
@@ -0,0 +1,37 @@
+"""Video editing blocks for AutoGPT Platform.
+
+This module provides blocks for:
+- Downloading videos from URLs (YouTube, Vimeo, news sites, direct links)
+- Clipping/trimming video segments
+- Concatenating multiple videos
+- Adding text overlays
+- Adding AI-generated narration
+- Getting media duration
+- Looping videos
+- Adding audio to videos
+
+Dependencies:
+- yt-dlp: For video downloading
+- moviepy: For video editing operations
+- elevenlabs: For AI narration (optional)
+"""
+
+from backend.blocks.video.add_audio import AddAudioToVideoBlock
+from backend.blocks.video.clip import VideoClipBlock
+from backend.blocks.video.concat import VideoConcatBlock
+from backend.blocks.video.download import VideoDownloadBlock
+from backend.blocks.video.duration import MediaDurationBlock
+from backend.blocks.video.loop import LoopVideoBlock
+from backend.blocks.video.narration import VideoNarrationBlock
+from backend.blocks.video.text_overlay import VideoTextOverlayBlock
+
+__all__ = [
+ "AddAudioToVideoBlock",
+ "LoopVideoBlock",
+ "MediaDurationBlock",
+ "VideoClipBlock",
+ "VideoConcatBlock",
+ "VideoDownloadBlock",
+ "VideoNarrationBlock",
+ "VideoTextOverlayBlock",
+]
diff --git a/autogpt_platform/backend/backend/blocks/video/_utils.py b/autogpt_platform/backend/backend/blocks/video/_utils.py
new file mode 100644
index 0000000000..9ebf195078
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/_utils.py
@@ -0,0 +1,131 @@
+"""Shared utilities for video blocks."""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+import subprocess
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Known operation tags added by video blocks
+_VIDEO_OPS = (
+ r"(?:clip|overlay|narrated|looped|concat|audio_attached|with_audio|narration)"
+)
+
+# Matches: {node_exec_id}_{operation}_ where node_exec_id contains a UUID
+_BLOCK_PREFIX_RE = re.compile(
+ r"^[a-zA-Z0-9_-]*"
+ r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
+ r"[a-zA-Z0-9_-]*"
+ r"_" + _VIDEO_OPS + r"_"
+)
+
+# Matches: a lone {node_exec_id}_ prefix (no operation keyword, e.g. download output)
+_UUID_PREFIX_RE = re.compile(
+ r"^[a-zA-Z0-9_-]*"
+ r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
+ r"[a-zA-Z0-9_-]*_"
+)
+
+
+def extract_source_name(input_path: str, max_length: int = 50) -> str:
+ """Extract the original source filename by stripping block-generated prefixes.
+
+ Iteratively removes {node_exec_id}_{operation}_ prefixes that accumulate
+ when chaining video blocks, recovering the original human-readable name.
+
+ Safe for plain filenames (no UUID -> no stripping).
+ Falls back to "video" if everything is stripped.
+ """
+ stem = Path(input_path).stem
+
+ # Pass 1: strip {node_exec_id}_{operation}_ prefixes iteratively
+ while _BLOCK_PREFIX_RE.match(stem):
+ stem = _BLOCK_PREFIX_RE.sub("", stem, count=1)
+
+ # Pass 2: strip a lone {node_exec_id}_ prefix (e.g. from download block)
+ if _UUID_PREFIX_RE.match(stem):
+ stem = _UUID_PREFIX_RE.sub("", stem, count=1)
+
+ if not stem:
+ return "video"
+
+ return stem[:max_length]
+
+
+def get_video_codecs(output_path: str) -> tuple[str, str]:
+ """Get appropriate video and audio codecs based on output file extension.
+
+ Args:
+ output_path: Path to the output file (used to determine extension)
+
+ Returns:
+ Tuple of (video_codec, audio_codec)
+
+ Codec mappings:
+ - .mp4: H.264 + AAC (universal compatibility)
+ - .webm: VP8 + Vorbis (web streaming)
+ - .mkv: H.264 + AAC (container supports many codecs)
+ - .mov: H.264 + AAC (Apple QuickTime, widely compatible)
+ - .m4v: H.264 + AAC (Apple iTunes/devices)
+ - .avi: MPEG-4 + MP3 (legacy Windows)
+ """
+ ext = os.path.splitext(output_path)[1].lower()
+
+ codec_map: dict[str, tuple[str, str]] = {
+ ".mp4": ("libx264", "aac"),
+ ".webm": ("libvpx", "libvorbis"),
+ ".mkv": ("libx264", "aac"),
+ ".mov": ("libx264", "aac"),
+ ".m4v": ("libx264", "aac"),
+ ".avi": ("mpeg4", "libmp3lame"),
+ }
+
+ return codec_map.get(ext, ("libx264", "aac"))
+
+
+def strip_chapters_inplace(video_path: str) -> None:
+ """Strip chapter metadata from a media file in-place using ffmpeg.
+
+ MoviePy 2.x crashes with IndexError when parsing files with embedded
+ chapter metadata (https://github.com/Zulko/moviepy/issues/2419).
+ This strips chapters without re-encoding.
+
+ Args:
+ video_path: Absolute path to the media file to strip chapters from.
+ """
+ base, ext = os.path.splitext(video_path)
+ tmp_path = base + ".tmp" + ext
+ try:
+ result = subprocess.run(
+ [
+ "ffmpeg",
+ "-y",
+ "-i",
+ video_path,
+ "-map_chapters",
+ "-1",
+ "-codec",
+ "copy",
+ tmp_path,
+ ],
+ capture_output=True,
+ text=True,
+ timeout=300,
+ )
+ if result.returncode != 0:
+ logger.warning(
+ "ffmpeg chapter strip failed (rc=%d): %s",
+ result.returncode,
+ result.stderr,
+ )
+ return
+ os.replace(tmp_path, video_path)
+ except FileNotFoundError:
+ logger.warning("ffmpeg not found; skipping chapter strip")
+ finally:
+ if os.path.exists(tmp_path):
+ os.unlink(tmp_path)
diff --git a/autogpt_platform/backend/backend/blocks/video/add_audio.py b/autogpt_platform/backend/backend/blocks/video/add_audio.py
new file mode 100644
index 0000000000..ebd4ab94f2
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/add_audio.py
@@ -0,0 +1,113 @@
+"""AddAudioToVideoBlock - Attach an audio track to a video file."""
+
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import extract_source_name, strip_chapters_inplace
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class AddAudioToVideoBlock(Block):
+ """Add (attach) an audio track to an existing video."""
+
+ class Input(BlockSchemaInput):
+ video_in: MediaFileType = SchemaField(
+ description="Video input (URL, data URI, or local path)."
+ )
+ audio_in: MediaFileType = SchemaField(
+ description="Audio input (URL, data URI, or local path)."
+ )
+ volume: float = SchemaField(
+ description="Volume scale for the newly attached audio track (1.0 = original).",
+ default=1.0,
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Final video (with attached audio), as a path or data URI."
+ )
+
+ def __init__(self):
+ super().__init__(
+ id="3503748d-62b6-4425-91d6-725b064af509",
+ description="Block to attach an audio file to a video file using moviepy.",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=AddAudioToVideoBlock.Input,
+ output_schema=AddAudioToVideoBlock.Output,
+ )
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ **kwargs,
+ ) -> BlockOutput:
+ assert execution_context.graph_exec_id is not None
+ assert execution_context.node_exec_id is not None
+ graph_exec_id = execution_context.graph_exec_id
+ node_exec_id = execution_context.node_exec_id
+
+ # 1) Store the inputs locally
+ local_video_path = await store_media_file(
+ file=input_data.video_in,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+ local_audio_path = await store_media_file(
+ file=input_data.audio_in,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+
+ video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+ audio_abspath = get_exec_file_path(graph_exec_id, local_audio_path)
+
+ # 2) Load video + audio with moviepy
+ strip_chapters_inplace(video_abspath)
+ strip_chapters_inplace(audio_abspath)
+ video_clip = None
+ audio_clip = None
+ final_clip = None
+ try:
+ video_clip = VideoFileClip(video_abspath)
+ audio_clip = AudioFileClip(audio_abspath)
+ # Optionally scale volume
+ if input_data.volume != 1.0:
+ audio_clip = audio_clip.with_volume_scaled(input_data.volume)
+
+ # 3) Attach the new audio track
+ final_clip = video_clip.with_audio(audio_clip)
+
+ # 4) Write to output file
+ source = extract_source_name(local_video_path)
+ output_filename = MediaFileType(f"{node_exec_id}_with_audio_{source}.mp4")
+ output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+ final_clip.write_videofile(
+ output_abspath, codec="libx264", audio_codec="aac"
+ )
+ finally:
+ if final_clip:
+ final_clip.close()
+ if audio_clip:
+ audio_clip.close()
+ if video_clip:
+ video_clip.close()
+
+ # 5) Return output - for_block_output returns workspace:// if available, else data URI
+ video_out = await store_media_file(
+ file=output_filename,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ yield "video_out", video_out
diff --git a/autogpt_platform/backend/backend/blocks/video/clip.py b/autogpt_platform/backend/backend/blocks/video/clip.py
new file mode 100644
index 0000000000..05deea6530
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/clip.py
@@ -0,0 +1,167 @@
+"""VideoClipBlock - Extract a segment from a video file."""
+
+from typing import Literal
+
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import (
+ extract_source_name,
+ get_video_codecs,
+ strip_chapters_inplace,
+)
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoClipBlock(Block):
+ """Extract a time segment from a video."""
+
+ class Input(BlockSchemaInput):
+ video_in: MediaFileType = SchemaField(
+ description="Input video (URL, data URI, or local path)"
+ )
+ start_time: float = SchemaField(description="Start time in seconds", ge=0.0)
+ end_time: float = SchemaField(description="End time in seconds", ge=0.0)
+ output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField(
+ description="Output format", default="mp4", advanced=True
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Clipped video file (path or data URI)"
+ )
+ duration: float = SchemaField(description="Clip duration in seconds")
+
+ def __init__(self):
+ super().__init__(
+ id="8f539119-e580-4d86-ad41-86fbcb22abb1",
+ description="Extract a time segment from a video",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=self.Input,
+ output_schema=self.Output,
+ test_input={
+ "video_in": "/tmp/test.mp4",
+ "start_time": 0.0,
+ "end_time": 10.0,
+ },
+ test_output=[("video_out", str), ("duration", float)],
+ test_mock={
+ "_clip_video": lambda *args: 10.0,
+ "_store_input_video": lambda *args, **kwargs: "test.mp4",
+ "_store_output_video": lambda *args, **kwargs: "clip_test.mp4",
+ },
+ )
+
+ async def _store_input_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store input video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+
+ async def _store_output_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store output video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ def _clip_video(
+ self,
+ video_abspath: str,
+ output_abspath: str,
+ start_time: float,
+ end_time: float,
+ ) -> float:
+ """Extract a clip from a video. Extracted for testability."""
+ clip = None
+ subclip = None
+ try:
+ strip_chapters_inplace(video_abspath)
+ clip = VideoFileClip(video_abspath)
+ subclip = clip.subclipped(start_time, end_time)
+ video_codec, audio_codec = get_video_codecs(output_abspath)
+ subclip.write_videofile(
+ output_abspath, codec=video_codec, audio_codec=audio_codec
+ )
+ return subclip.duration
+ finally:
+ if subclip:
+ subclip.close()
+ if clip:
+ clip.close()
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ node_exec_id: str,
+ **kwargs,
+ ) -> BlockOutput:
+ # Validate time range
+ if input_data.end_time <= input_data.start_time:
+ raise BlockExecutionError(
+ message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})",
+ block_name=self.name,
+ block_id=str(self.id),
+ )
+
+ try:
+ assert execution_context.graph_exec_id is not None
+
+ # Store the input video locally
+ local_video_path = await self._store_input_video(
+ execution_context, input_data.video_in
+ )
+ video_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, local_video_path
+ )
+
+ # Build output path
+ source = extract_source_name(local_video_path)
+ output_filename = MediaFileType(
+ f"{node_exec_id}_clip_{source}.{input_data.output_format}"
+ )
+ output_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, output_filename
+ )
+
+ duration = self._clip_video(
+ video_abspath,
+ output_abspath,
+ input_data.start_time,
+ input_data.end_time,
+ )
+
+ # Return as workspace path or data URI based on context
+ video_out = await self._store_output_video(
+ execution_context, output_filename
+ )
+
+ yield "video_out", video_out
+ yield "duration", duration
+
+ except BlockExecutionError:
+ raise
+ except Exception as e:
+ raise BlockExecutionError(
+ message=f"Failed to clip video: {e}",
+ block_name=self.name,
+ block_id=str(self.id),
+ ) from e
diff --git a/autogpt_platform/backend/backend/blocks/video/concat.py b/autogpt_platform/backend/backend/blocks/video/concat.py
new file mode 100644
index 0000000000..b49854fb40
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/concat.py
@@ -0,0 +1,227 @@
+"""VideoConcatBlock - Concatenate multiple video clips into one."""
+
+from typing import Literal
+
+from moviepy import concatenate_videoclips
+from moviepy.video.fx import CrossFadeIn, CrossFadeOut, FadeIn, FadeOut
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import (
+ extract_source_name,
+ get_video_codecs,
+ strip_chapters_inplace,
+)
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoConcatBlock(Block):
+ """Merge multiple video clips into one continuous video."""
+
+ class Input(BlockSchemaInput):
+ videos: list[MediaFileType] = SchemaField(
+ description="List of video files to concatenate (in order)"
+ )
+ transition: Literal["none", "crossfade", "fade_black"] = SchemaField(
+ description="Transition between clips", default="none"
+ )
+ transition_duration: int = SchemaField(
+ description="Transition duration in seconds",
+ default=1,
+ ge=0,
+ advanced=True,
+ )
+ output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField(
+ description="Output format", default="mp4", advanced=True
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Concatenated video file (path or data URI)"
+ )
+ total_duration: float = SchemaField(description="Total duration in seconds")
+
+ def __init__(self):
+ super().__init__(
+ id="9b0f531a-1118-487f-aeec-3fa63ea8900a",
+ description="Merge multiple video clips into one continuous video",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=self.Input,
+ output_schema=self.Output,
+ test_input={
+ "videos": ["/tmp/a.mp4", "/tmp/b.mp4"],
+ },
+ test_output=[
+ ("video_out", str),
+ ("total_duration", float),
+ ],
+ test_mock={
+ "_concat_videos": lambda *args: 20.0,
+ "_store_input_video": lambda *args, **kwargs: "test.mp4",
+ "_store_output_video": lambda *args, **kwargs: "concat_test.mp4",
+ },
+ )
+
+ async def _store_input_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store input video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+
+ async def _store_output_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store output video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ def _concat_videos(
+ self,
+ video_abspaths: list[str],
+ output_abspath: str,
+ transition: str,
+ transition_duration: int,
+ ) -> float:
+ """Concatenate videos. Extracted for testability.
+
+ Returns:
+ Total duration of the concatenated video.
+ """
+ clips = []
+ faded_clips = []
+ final = None
+ try:
+ # Load clips
+ for v in video_abspaths:
+ strip_chapters_inplace(v)
+ clips.append(VideoFileClip(v))
+
+ # Validate transition_duration against shortest clip
+ if transition in {"crossfade", "fade_black"} and transition_duration > 0:
+ min_duration = min(c.duration for c in clips)
+ if transition_duration >= min_duration:
+ raise BlockExecutionError(
+ message=(
+ f"transition_duration ({transition_duration}s) must be "
+ f"shorter than the shortest clip ({min_duration:.2f}s)"
+ ),
+ block_name=self.name,
+ block_id=str(self.id),
+ )
+
+ if transition == "crossfade":
+ for i, clip in enumerate(clips):
+ effects = []
+ if i > 0:
+ effects.append(CrossFadeIn(transition_duration))
+ if i < len(clips) - 1:
+ effects.append(CrossFadeOut(transition_duration))
+ if effects:
+ clip = clip.with_effects(effects)
+ faded_clips.append(clip)
+ final = concatenate_videoclips(
+ faded_clips,
+ method="compose",
+ padding=-transition_duration,
+ )
+ elif transition == "fade_black":
+ for clip in clips:
+ faded = clip.with_effects(
+ [FadeIn(transition_duration), FadeOut(transition_duration)]
+ )
+ faded_clips.append(faded)
+ final = concatenate_videoclips(faded_clips)
+ else:
+ final = concatenate_videoclips(clips)
+
+ video_codec, audio_codec = get_video_codecs(output_abspath)
+ final.write_videofile(
+ output_abspath, codec=video_codec, audio_codec=audio_codec
+ )
+
+ return final.duration
+ finally:
+ if final:
+ final.close()
+ for clip in faded_clips:
+ clip.close()
+ for clip in clips:
+ clip.close()
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ node_exec_id: str,
+ **kwargs,
+ ) -> BlockOutput:
+ # Validate minimum clips
+ if len(input_data.videos) < 2:
+ raise BlockExecutionError(
+ message="At least 2 videos are required for concatenation",
+ block_name=self.name,
+ block_id=str(self.id),
+ )
+
+ try:
+ assert execution_context.graph_exec_id is not None
+
+ # Store all input videos locally
+ video_abspaths = []
+ for video in input_data.videos:
+ local_path = await self._store_input_video(execution_context, video)
+ video_abspaths.append(
+ get_exec_file_path(execution_context.graph_exec_id, local_path)
+ )
+
+ # Build output path
+ source = (
+ extract_source_name(video_abspaths[0]) if video_abspaths else "video"
+ )
+ output_filename = MediaFileType(
+ f"{node_exec_id}_concat_{source}.{input_data.output_format}"
+ )
+ output_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, output_filename
+ )
+
+ total_duration = self._concat_videos(
+ video_abspaths,
+ output_abspath,
+ input_data.transition,
+ input_data.transition_duration,
+ )
+
+ # Return as workspace path or data URI based on context
+ video_out = await self._store_output_video(
+ execution_context, output_filename
+ )
+
+ yield "video_out", video_out
+ yield "total_duration", total_duration
+
+ except BlockExecutionError:
+ raise
+ except Exception as e:
+ raise BlockExecutionError(
+ message=f"Failed to concatenate videos: {e}",
+ block_name=self.name,
+ block_id=str(self.id),
+ ) from e
diff --git a/autogpt_platform/backend/backend/blocks/video/download.py b/autogpt_platform/backend/backend/blocks/video/download.py
new file mode 100644
index 0000000000..4046d5df42
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/download.py
@@ -0,0 +1,172 @@
+"""VideoDownloadBlock - Download video from URL (YouTube, Vimeo, news sites, direct links)."""
+
+import os
+import typing
+from typing import Literal
+
+import yt_dlp
+
+if typing.TYPE_CHECKING:
+ from yt_dlp import _Params
+
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoDownloadBlock(Block):
+ """Download video from URL using yt-dlp."""
+
+ class Input(BlockSchemaInput):
+ url: str = SchemaField(
+ description="URL of the video to download (YouTube, Vimeo, direct link, etc.)",
+ placeholder="https://www.youtube.com/watch?v=...",
+ )
+ quality: Literal["best", "1080p", "720p", "480p", "audio_only"] = SchemaField(
+ description="Video quality preference", default="720p"
+ )
+ output_format: Literal["mp4", "webm", "mkv"] = SchemaField(
+ description="Output video format", default="mp4", advanced=True
+ )
+
+ class Output(BlockSchemaOutput):
+ video_file: MediaFileType = SchemaField(
+ description="Downloaded video (path or data URI)"
+ )
+ duration: float = SchemaField(description="Video duration in seconds")
+ title: str = SchemaField(description="Video title from source")
+ source_url: str = SchemaField(description="Original source URL")
+
+ def __init__(self):
+ super().__init__(
+ id="c35daabb-cd60-493b-b9ad-51f1fe4b50c4",
+ description="Download video from URL (YouTube, Vimeo, news sites, direct links)",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=self.Input,
+ output_schema=self.Output,
+ disabled=True, # Disable until we can sandbox yt-dlp and handle security implications
+ test_input={
+ "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+ "quality": "480p",
+ },
+ test_output=[
+ ("video_file", str),
+ ("duration", float),
+ ("title", str),
+ ("source_url", str),
+ ],
+ test_mock={
+ "_download_video": lambda *args: (
+ "video.mp4",
+ 212.0,
+ "Test Video",
+ ),
+ "_store_output_video": lambda *args, **kwargs: "video.mp4",
+ },
+ )
+
+ async def _store_output_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store output video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ def _get_format_string(self, quality: str) -> str:
+ formats = {
+ "best": "bestvideo+bestaudio/best",
+ "1080p": "bestvideo[height<=1080]+bestaudio/best[height<=1080]",
+ "720p": "bestvideo[height<=720]+bestaudio/best[height<=720]",
+ "480p": "bestvideo[height<=480]+bestaudio/best[height<=480]",
+ "audio_only": "bestaudio/best",
+ }
+ return formats.get(quality, formats["720p"])
+
+ def _download_video(
+ self,
+ url: str,
+ quality: str,
+ output_format: str,
+ output_dir: str,
+ node_exec_id: str,
+ ) -> tuple[str, float, str]:
+ """Download video. Extracted for testability."""
+ output_template = os.path.join(
+ output_dir, f"{node_exec_id}_%(title).50s.%(ext)s"
+ )
+
+ ydl_opts: "_Params" = {
+ "format": f"{self._get_format_string(quality)}/best",
+ "outtmpl": output_template,
+ "merge_output_format": output_format,
+ "quiet": True,
+ "no_warnings": True,
+ }
+
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+ info = ydl.extract_info(url, download=True)
+ video_path = ydl.prepare_filename(info)
+
+ # Handle format conversion in filename
+ if not video_path.endswith(f".{output_format}"):
+ video_path = video_path.rsplit(".", 1)[0] + f".{output_format}"
+
+ # Return just the filename, not the full path
+ filename = os.path.basename(video_path)
+
+ return (
+ filename,
+ info.get("duration") or 0.0,
+ info.get("title") or "Unknown",
+ )
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ node_exec_id: str,
+ **kwargs,
+ ) -> BlockOutput:
+ try:
+ assert execution_context.graph_exec_id is not None
+
+ # Get the exec file directory
+ output_dir = get_exec_file_path(execution_context.graph_exec_id, "")
+ os.makedirs(output_dir, exist_ok=True)
+
+ filename, duration, title = self._download_video(
+ input_data.url,
+ input_data.quality,
+ input_data.output_format,
+ output_dir,
+ node_exec_id,
+ )
+
+ # Return as workspace path or data URI based on context
+ video_out = await self._store_output_video(
+ execution_context, MediaFileType(filename)
+ )
+
+ yield "video_file", video_out
+ yield "duration", duration
+ yield "title", title
+ yield "source_url", input_data.url
+
+ except Exception as e:
+ raise BlockExecutionError(
+ message=f"Failed to download video: {e}",
+ block_name=self.name,
+ block_id=str(self.id),
+ ) from e
diff --git a/autogpt_platform/backend/backend/blocks/video/duration.py b/autogpt_platform/backend/backend/blocks/video/duration.py
new file mode 100644
index 0000000000..9e05d35b00
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/duration.py
@@ -0,0 +1,77 @@
+"""MediaDurationBlock - Get the duration of a media file."""
+
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import strip_chapters_inplace
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class MediaDurationBlock(Block):
+ """Get the duration of a media file (video or audio)."""
+
+ class Input(BlockSchemaInput):
+ media_in: MediaFileType = SchemaField(
+ description="Media input (URL, data URI, or local path)."
+ )
+ is_video: bool = SchemaField(
+ description="Whether the media is a video (True) or audio (False).",
+ default=True,
+ )
+
+ class Output(BlockSchemaOutput):
+ duration: float = SchemaField(
+ description="Duration of the media file (in seconds)."
+ )
+
+ def __init__(self):
+ super().__init__(
+ id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6",
+ description="Block to get the duration of a media file.",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=MediaDurationBlock.Input,
+ output_schema=MediaDurationBlock.Output,
+ )
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ **kwargs,
+ ) -> BlockOutput:
+ # 1) Store the input media locally
+ local_media_path = await store_media_file(
+ file=input_data.media_in,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+ assert execution_context.graph_exec_id is not None
+ media_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, local_media_path
+ )
+
+ # 2) Strip chapters to avoid MoviePy crash, then load the clip
+ strip_chapters_inplace(media_abspath)
+ clip = None
+ try:
+ if input_data.is_video:
+ clip = VideoFileClip(media_abspath)
+ else:
+ clip = AudioFileClip(media_abspath)
+
+ duration = clip.duration
+ finally:
+ if clip:
+ clip.close()
+
+ yield "duration", duration
diff --git a/autogpt_platform/backend/backend/blocks/video/loop.py b/autogpt_platform/backend/backend/blocks/video/loop.py
new file mode 100644
index 0000000000..461610f713
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/loop.py
@@ -0,0 +1,115 @@
+"""LoopVideoBlock - Loop a video to a given duration or number of repeats."""
+
+from typing import Optional
+
+from moviepy.video.fx.Loop import Loop
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import extract_source_name, strip_chapters_inplace
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class LoopVideoBlock(Block):
+ """Loop (repeat) a video clip until a given duration or number of loops."""
+
+ class Input(BlockSchemaInput):
+ video_in: MediaFileType = SchemaField(
+ description="The input video (can be a URL, data URI, or local path)."
+ )
+ duration: Optional[float] = SchemaField(
+ description="Target duration (in seconds) to loop the video to. Either duration or n_loops must be provided.",
+ default=None,
+ ge=0.0,
+ le=3600.0, # Max 1 hour to prevent disk exhaustion
+ )
+ n_loops: Optional[int] = SchemaField(
+ description="Number of times to repeat the video. Either n_loops or duration must be provided.",
+ default=None,
+ ge=1,
+ le=10, # Max 10 loops to prevent disk exhaustion
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Looped video returned either as a relative path or a data URI."
+ )
+
+ def __init__(self):
+ super().__init__(
+ id="8bf9eef6-5451-4213-b265-25306446e94b",
+ description="Block to loop a video to a given duration or number of repeats.",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=LoopVideoBlock.Input,
+ output_schema=LoopVideoBlock.Output,
+ )
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ **kwargs,
+ ) -> BlockOutput:
+ assert execution_context.graph_exec_id is not None
+ assert execution_context.node_exec_id is not None
+ graph_exec_id = execution_context.graph_exec_id
+ node_exec_id = execution_context.node_exec_id
+
+ # 1) Store the input video locally
+ local_video_path = await store_media_file(
+ file=input_data.video_in,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+ input_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+
+ # 2) Load the clip
+ strip_chapters_inplace(input_abspath)
+ clip = None
+ looped_clip = None
+ try:
+ clip = VideoFileClip(input_abspath)
+
+ # 3) Apply the loop effect
+ if input_data.duration:
+ # Loop until we reach the specified duration
+ looped_clip = clip.with_effects([Loop(duration=input_data.duration)])
+ elif input_data.n_loops:
+ looped_clip = clip.with_effects([Loop(n=input_data.n_loops)])
+ else:
+ raise ValueError("Either 'duration' or 'n_loops' must be provided.")
+
+ assert isinstance(looped_clip, VideoFileClip)
+
+ # 4) Save the looped output
+ source = extract_source_name(local_video_path)
+ output_filename = MediaFileType(f"{node_exec_id}_looped_{source}.mp4")
+ output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+ looped_clip = looped_clip.with_audio(clip.audio)
+ looped_clip.write_videofile(
+ output_abspath, codec="libx264", audio_codec="aac"
+ )
+ finally:
+ if looped_clip:
+ looped_clip.close()
+ if clip:
+ clip.close()
+
+ # Return output - for_block_output returns workspace:// if available, else data URI
+ video_out = await store_media_file(
+ file=output_filename,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ yield "video_out", video_out
diff --git a/autogpt_platform/backend/backend/blocks/video/narration.py b/autogpt_platform/backend/backend/blocks/video/narration.py
new file mode 100644
index 0000000000..adf41753c8
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/narration.py
@@ -0,0 +1,267 @@
+"""VideoNarrationBlock - Generate AI voice narration and add to video."""
+
+import os
+from typing import Literal
+
+from elevenlabs import ElevenLabs
+from moviepy import CompositeAudioClip
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.elevenlabs._auth import (
+ TEST_CREDENTIALS,
+ TEST_CREDENTIALS_INPUT,
+ ElevenLabsCredentials,
+ ElevenLabsCredentialsInput,
+)
+from backend.blocks.video._utils import (
+ extract_source_name,
+ get_video_codecs,
+ strip_chapters_inplace,
+)
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import CredentialsField, SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoNarrationBlock(Block):
+ """Generate AI narration and add to video."""
+
+ class Input(BlockSchemaInput):
+ credentials: ElevenLabsCredentialsInput = CredentialsField(
+ description="ElevenLabs API key for voice synthesis"
+ )
+ video_in: MediaFileType = SchemaField(
+ description="Input video (URL, data URI, or local path)"
+ )
+ script: str = SchemaField(description="Narration script text")
+ voice_id: str = SchemaField(
+ description="ElevenLabs voice ID", default="21m00Tcm4TlvDq8ikWAM" # Rachel
+ )
+ model_id: Literal[
+ "eleven_multilingual_v2",
+ "eleven_flash_v2_5",
+ "eleven_turbo_v2_5",
+ "eleven_turbo_v2",
+ ] = SchemaField(
+ description="ElevenLabs TTS model",
+ default="eleven_multilingual_v2",
+ )
+ mix_mode: Literal["replace", "mix", "ducking"] = SchemaField(
+ description="How to combine with original audio. 'ducking' applies stronger attenuation than 'mix'.",
+ default="ducking",
+ )
+ narration_volume: float = SchemaField(
+ description="Narration volume (0.0 to 2.0)",
+ default=1.0,
+ ge=0.0,
+ le=2.0,
+ advanced=True,
+ )
+ original_volume: float = SchemaField(
+ description="Original audio volume when mixing (0.0 to 1.0)",
+ default=0.3,
+ ge=0.0,
+ le=1.0,
+ advanced=True,
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Video with narration (path or data URI)"
+ )
+ audio_file: MediaFileType = SchemaField(
+ description="Generated audio file (path or data URI)"
+ )
+
+ def __init__(self):
+ super().__init__(
+ id="3d036b53-859c-4b17-9826-ca340f736e0e",
+ description="Generate AI narration and add to video",
+ categories={BlockCategory.MULTIMEDIA, BlockCategory.AI},
+ input_schema=self.Input,
+ output_schema=self.Output,
+ test_input={
+ "video_in": "/tmp/test.mp4",
+ "script": "Hello world",
+ "credentials": TEST_CREDENTIALS_INPUT,
+ },
+ test_credentials=TEST_CREDENTIALS,
+ test_output=[("video_out", str), ("audio_file", str)],
+ test_mock={
+ "_generate_narration_audio": lambda *args: b"mock audio content",
+ "_add_narration_to_video": lambda *args: None,
+ "_store_input_video": lambda *args, **kwargs: "test.mp4",
+ "_store_output_video": lambda *args, **kwargs: "narrated_test.mp4",
+ },
+ )
+
+ async def _store_input_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store input video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+
+ async def _store_output_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store output video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ def _generate_narration_audio(
+ self, api_key: str, script: str, voice_id: str, model_id: str
+ ) -> bytes:
+ """Generate narration audio via ElevenLabs API."""
+ client = ElevenLabs(api_key=api_key)
+ audio_generator = client.text_to_speech.convert(
+ voice_id=voice_id,
+ text=script,
+ model_id=model_id,
+ )
+ # The SDK returns a generator, collect all chunks
+ return b"".join(audio_generator)
+
+ def _add_narration_to_video(
+ self,
+ video_abspath: str,
+ audio_abspath: str,
+ output_abspath: str,
+ mix_mode: str,
+ narration_volume: float,
+ original_volume: float,
+ ) -> None:
+ """Add narration audio to video. Extracted for testability."""
+ video = None
+ final = None
+ narration_original = None
+ narration_scaled = None
+ original = None
+
+ try:
+ strip_chapters_inplace(video_abspath)
+ video = VideoFileClip(video_abspath)
+ narration_original = AudioFileClip(audio_abspath)
+ narration_scaled = narration_original.with_volume_scaled(narration_volume)
+ narration = narration_scaled
+
+ if mix_mode == "replace":
+ final_audio = narration
+ elif mix_mode == "mix":
+ if video.audio:
+ original = video.audio.with_volume_scaled(original_volume)
+ final_audio = CompositeAudioClip([original, narration])
+ else:
+ final_audio = narration
+ else: # ducking - apply stronger attenuation
+ if video.audio:
+ # Ducking uses a much lower volume for original audio
+ ducking_volume = original_volume * 0.3
+ original = video.audio.with_volume_scaled(ducking_volume)
+ final_audio = CompositeAudioClip([original, narration])
+ else:
+ final_audio = narration
+
+ final = video.with_audio(final_audio)
+ video_codec, audio_codec = get_video_codecs(output_abspath)
+ final.write_videofile(
+ output_abspath, codec=video_codec, audio_codec=audio_codec
+ )
+
+ finally:
+ if original:
+ original.close()
+ if narration_scaled:
+ narration_scaled.close()
+ if narration_original:
+ narration_original.close()
+ if final:
+ final.close()
+ if video:
+ video.close()
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ credentials: ElevenLabsCredentials,
+ execution_context: ExecutionContext,
+ node_exec_id: str,
+ **kwargs,
+ ) -> BlockOutput:
+ try:
+ assert execution_context.graph_exec_id is not None
+
+ # Store the input video locally
+ local_video_path = await self._store_input_video(
+ execution_context, input_data.video_in
+ )
+ video_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, local_video_path
+ )
+
+ # Generate narration audio via ElevenLabs
+ audio_content = self._generate_narration_audio(
+ credentials.api_key.get_secret_value(),
+ input_data.script,
+ input_data.voice_id,
+ input_data.model_id,
+ )
+
+ # Save audio to exec file path
+ audio_filename = MediaFileType(f"{node_exec_id}_narration.mp3")
+ audio_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, audio_filename
+ )
+ os.makedirs(os.path.dirname(audio_abspath), exist_ok=True)
+ with open(audio_abspath, "wb") as f:
+ f.write(audio_content)
+
+ # Add narration to video
+ source = extract_source_name(local_video_path)
+ output_filename = MediaFileType(f"{node_exec_id}_narrated_{source}.mp4")
+ output_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, output_filename
+ )
+
+ self._add_narration_to_video(
+ video_abspath,
+ audio_abspath,
+ output_abspath,
+ input_data.mix_mode,
+ input_data.narration_volume,
+ input_data.original_volume,
+ )
+
+ # Return as workspace path or data URI based on context
+ video_out = await self._store_output_video(
+ execution_context, output_filename
+ )
+ audio_out = await self._store_output_video(
+ execution_context, audio_filename
+ )
+
+ yield "video_out", video_out
+ yield "audio_file", audio_out
+
+ except Exception as e:
+ raise BlockExecutionError(
+ message=f"Failed to add narration: {e}",
+ block_name=self.name,
+ block_id=str(self.id),
+ ) from e
diff --git a/autogpt_platform/backend/backend/blocks/video/text_overlay.py b/autogpt_platform/backend/backend/blocks/video/text_overlay.py
new file mode 100644
index 0000000000..cb7cfe0420
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/text_overlay.py
@@ -0,0 +1,231 @@
+"""VideoTextOverlayBlock - Add text overlay to video."""
+
+from typing import Literal
+
+from moviepy import CompositeVideoClip, TextClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import (
+ extract_source_name,
+ get_video_codecs,
+ strip_chapters_inplace,
+)
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoTextOverlayBlock(Block):
+ """Add text overlay/caption to video."""
+
+ class Input(BlockSchemaInput):
+ video_in: MediaFileType = SchemaField(
+ description="Input video (URL, data URI, or local path)"
+ )
+ text: str = SchemaField(description="Text to overlay on video")
+ position: Literal[
+ "top",
+ "center",
+ "bottom",
+ "top-left",
+ "top-right",
+ "bottom-left",
+ "bottom-right",
+ ] = SchemaField(description="Position of text on screen", default="bottom")
+ start_time: float | None = SchemaField(
+ description="When to show text (seconds). None = entire video",
+ default=None,
+ advanced=True,
+ )
+ end_time: float | None = SchemaField(
+ description="When to hide text (seconds). None = until end",
+ default=None,
+ advanced=True,
+ )
+ font_size: int = SchemaField(
+ description="Font size", default=48, ge=12, le=200, advanced=True
+ )
+ font_color: str = SchemaField(
+ description="Font color (hex or name)", default="white", advanced=True
+ )
+ bg_color: str | None = SchemaField(
+ description="Background color behind text (None for transparent)",
+ default=None,
+ advanced=True,
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Video with text overlay (path or data URI)"
+ )
+
+ def __init__(self):
+ super().__init__(
+ id="8ef14de6-cc90-430a-8cfa-3a003be92454",
+ description="Add text overlay/caption to video",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=self.Input,
+ output_schema=self.Output,
+ disabled=True, # Disable until we can lockdown imagemagick security policy
+ test_input={"video_in": "/tmp/test.mp4", "text": "Hello World"},
+ test_output=[("video_out", str)],
+ test_mock={
+ "_add_text_overlay": lambda *args: None,
+ "_store_input_video": lambda *args, **kwargs: "test.mp4",
+ "_store_output_video": lambda *args, **kwargs: "overlay_test.mp4",
+ },
+ )
+
+ async def _store_input_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store input video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+
+ async def _store_output_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store output video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ def _add_text_overlay(
+ self,
+ video_abspath: str,
+ output_abspath: str,
+ text: str,
+ position: str,
+ start_time: float | None,
+ end_time: float | None,
+ font_size: int,
+ font_color: str,
+ bg_color: str | None,
+ ) -> None:
+ """Add text overlay to video. Extracted for testability."""
+ video = None
+ final = None
+ txt_clip = None
+ try:
+ strip_chapters_inplace(video_abspath)
+ video = VideoFileClip(video_abspath)
+
+ txt_clip = TextClip(
+ text=text,
+ font_size=font_size,
+ color=font_color,
+ bg_color=bg_color,
+ )
+
+ # Position mapping
+ pos_map = {
+ "top": ("center", "top"),
+ "center": ("center", "center"),
+ "bottom": ("center", "bottom"),
+ "top-left": ("left", "top"),
+ "top-right": ("right", "top"),
+ "bottom-left": ("left", "bottom"),
+ "bottom-right": ("right", "bottom"),
+ }
+
+ txt_clip = txt_clip.with_position(pos_map[position])
+
+ # Set timing
+ start = start_time or 0
+ end = end_time or video.duration
+ duration = max(0, end - start)
+ txt_clip = txt_clip.with_start(start).with_end(end).with_duration(duration)
+
+ final = CompositeVideoClip([video, txt_clip])
+ video_codec, audio_codec = get_video_codecs(output_abspath)
+ final.write_videofile(
+ output_abspath, codec=video_codec, audio_codec=audio_codec
+ )
+
+ finally:
+ if txt_clip:
+ txt_clip.close()
+ if final:
+ final.close()
+ if video:
+ video.close()
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ node_exec_id: str,
+ **kwargs,
+ ) -> BlockOutput:
+ # Validate time range if both are provided
+ if (
+ input_data.start_time is not None
+ and input_data.end_time is not None
+ and input_data.end_time <= input_data.start_time
+ ):
+ raise BlockExecutionError(
+ message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})",
+ block_name=self.name,
+ block_id=str(self.id),
+ )
+
+ try:
+ assert execution_context.graph_exec_id is not None
+
+ # Store the input video locally
+ local_video_path = await self._store_input_video(
+ execution_context, input_data.video_in
+ )
+ video_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, local_video_path
+ )
+
+ # Build output path
+ source = extract_source_name(local_video_path)
+ output_filename = MediaFileType(f"{node_exec_id}_overlay_{source}.mp4")
+ output_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, output_filename
+ )
+
+ self._add_text_overlay(
+ video_abspath,
+ output_abspath,
+ input_data.text,
+ input_data.position,
+ input_data.start_time,
+ input_data.end_time,
+ input_data.font_size,
+ input_data.font_color,
+ input_data.bg_color,
+ )
+
+ # Return as workspace path or data URI based on context
+ video_out = await self._store_output_video(
+ execution_context, output_filename
+ )
+
+ yield "video_out", video_out
+
+ except BlockExecutionError:
+ raise
+ except Exception as e:
+ raise BlockExecutionError(
+ message=f"Failed to add text overlay: {e}",
+ block_name=self.name,
+ block_id=str(self.id),
+ ) from e
diff --git a/autogpt_platform/backend/backend/blocks/youtube.py b/autogpt_platform/backend/backend/blocks/youtube.py
index e79be3e99b..6d81a86b4c 100644
--- a/autogpt_platform/backend/backend/blocks/youtube.py
+++ b/autogpt_platform/backend/backend/blocks/youtube.py
@@ -165,10 +165,13 @@ class TranscribeYoutubeVideoBlock(Block):
credentials: WebshareProxyCredentials,
**kwargs,
) -> BlockOutput:
- video_id = self.extract_video_id(input_data.youtube_url)
- yield "video_id", video_id
+ try:
+ video_id = self.extract_video_id(input_data.youtube_url)
+ transcript = self.get_transcript(video_id, credentials)
+ transcript_text = self.format_transcript(transcript=transcript)
- transcript = self.get_transcript(video_id, credentials)
- transcript_text = self.format_transcript(transcript=transcript)
-
- yield "transcript", transcript_text
+ # Only yield after all operations succeed
+ yield "video_id", video_id
+ yield "transcript", transcript_text
+ except Exception as e:
+ yield "error", str(e)
diff --git a/autogpt_platform/backend/backend/data/block_cost_config.py b/autogpt_platform/backend/backend/data/block_cost_config.py
index f46cc726f0..ec35afa401 100644
--- a/autogpt_platform/backend/backend/data/block_cost_config.py
+++ b/autogpt_platform/backend/backend/data/block_cost_config.py
@@ -36,12 +36,14 @@ from backend.blocks.replicate.replicate_block import ReplicateModelBlock
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
from backend.blocks.talking_head import CreateTalkingAvatarVideoBlock
from backend.blocks.text_to_speech_block import UnrealTextToSpeechBlock
+from backend.blocks.video.narration import VideoNarrationBlock
from backend.data.block import Block, BlockCost, BlockCostType
from backend.integrations.credentials_store import (
aiml_api_credentials,
anthropic_credentials,
apollo_credentials,
did_credentials,
+ elevenlabs_credentials,
enrichlayer_credentials,
groq_credentials,
ideogram_credentials,
@@ -78,6 +80,7 @@ MODEL_COST: dict[LlmModel, int] = {
LlmModel.CLAUDE_4_1_OPUS: 21,
LlmModel.CLAUDE_4_OPUS: 21,
LlmModel.CLAUDE_4_SONNET: 5,
+ LlmModel.CLAUDE_4_6_OPUS: 14,
LlmModel.CLAUDE_4_5_HAIKU: 4,
LlmModel.CLAUDE_4_5_OPUS: 14,
LlmModel.CLAUDE_4_5_SONNET: 9,
@@ -639,4 +642,16 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
},
),
],
+ VideoNarrationBlock: [
+ BlockCost(
+ cost_amount=5, # ElevenLabs TTS cost
+ cost_filter={
+ "credentials": {
+ "id": elevenlabs_credentials.id,
+ "provider": elevenlabs_credentials.provider,
+ "type": elevenlabs_credentials.type,
+ }
+ },
+ )
+ ],
}
diff --git a/autogpt_platform/backend/backend/data/credit_test.py b/autogpt_platform/backend/backend/data/credit_test.py
index 391a373b86..2b10c62882 100644
--- a/autogpt_platform/backend/backend/data/credit_test.py
+++ b/autogpt_platform/backend/backend/data/credit_test.py
@@ -134,6 +134,16 @@ async def test_block_credit_reset(server: SpinTestServer):
month1 = datetime.now(timezone.utc).replace(month=1, day=1)
user_credit.time_now = lambda: month1
+ # IMPORTANT: Set updatedAt to December of previous year to ensure it's
+ # in a different month than month1 (January). This fixes a timing bug
+ # where if the test runs in early February, 35 days ago would be January,
+ # matching the mocked month1 and preventing the refill from triggering.
+ dec_previous_year = month1.replace(year=month1.year - 1, month=12, day=15)
+ await UserBalance.prisma().update(
+ where={"userId": DEFAULT_USER_ID},
+ data={"updatedAt": dec_previous_year},
+ )
+
# First call in month 1 should trigger refill
balance = await user_credit.get_credits(DEFAULT_USER_ID)
assert balance == REFILL_VALUE # Should get 1000 credits
diff --git a/autogpt_platform/backend/backend/integrations/credentials_store.py b/autogpt_platform/backend/backend/integrations/credentials_store.py
index 40a6f7269c..384405b0c7 100644
--- a/autogpt_platform/backend/backend/integrations/credentials_store.py
+++ b/autogpt_platform/backend/backend/integrations/credentials_store.py
@@ -224,6 +224,14 @@ openweathermap_credentials = APIKeyCredentials(
expires_at=None,
)
+elevenlabs_credentials = APIKeyCredentials(
+ id="f4a8b6c2-3d1e-4f5a-9b8c-7d6e5f4a3b2c",
+ provider="elevenlabs",
+ api_key=SecretStr(settings.secrets.elevenlabs_api_key),
+ title="Use Credits for ElevenLabs",
+ expires_at=None,
+)
+
DEFAULT_CREDENTIALS = [
ollama_credentials,
revid_credentials,
@@ -252,6 +260,7 @@ DEFAULT_CREDENTIALS = [
v0_credentials,
webshare_proxy_credentials,
openweathermap_credentials,
+ elevenlabs_credentials,
]
SYSTEM_CREDENTIAL_IDS = {cred.id for cred in DEFAULT_CREDENTIALS}
@@ -366,6 +375,8 @@ class IntegrationCredentialsStore:
all_credentials.append(webshare_proxy_credentials)
if settings.secrets.openweathermap_api_key:
all_credentials.append(openweathermap_credentials)
+ if settings.secrets.elevenlabs_api_key:
+ all_credentials.append(elevenlabs_credentials)
return all_credentials
async def get_creds_by_id(
diff --git a/autogpt_platform/backend/backend/integrations/providers.py b/autogpt_platform/backend/backend/integrations/providers.py
index 3af5006ca4..8a0d6fd183 100644
--- a/autogpt_platform/backend/backend/integrations/providers.py
+++ b/autogpt_platform/backend/backend/integrations/providers.py
@@ -18,6 +18,7 @@ class ProviderName(str, Enum):
DISCORD = "discord"
D_ID = "d_id"
E2B = "e2b"
+ ELEVENLABS = "elevenlabs"
FAL = "fal"
GITHUB = "github"
GOOGLE = "google"
diff --git a/autogpt_platform/backend/backend/util/file.py b/autogpt_platform/backend/backend/util/file.py
index baa9225629..1b8dbdea82 100644
--- a/autogpt_platform/backend/backend/util/file.py
+++ b/autogpt_platform/backend/backend/util/file.py
@@ -8,6 +8,8 @@ from pathlib import Path
from typing import TYPE_CHECKING, Literal
from urllib.parse import urlparse
+from pydantic import BaseModel
+
from backend.util.cloud_storage import get_cloud_storage_handler
from backend.util.request import Requests
from backend.util.settings import Config
@@ -17,6 +19,35 @@ from backend.util.virus_scanner import scan_content_safe
if TYPE_CHECKING:
from backend.data.execution import ExecutionContext
+
+class WorkspaceUri(BaseModel):
+ """Parsed workspace:// URI."""
+
+ file_ref: str # File ID or path (e.g. "abc123" or "/path/to/file.txt")
+ mime_type: str | None = None # MIME type from fragment (e.g. "video/mp4")
+ is_path: bool = False # True if file_ref is a path (starts with "/")
+
+
+def parse_workspace_uri(uri: str) -> WorkspaceUri:
+ """Parse a workspace:// URI into its components.
+
+ Examples:
+ "workspace://abc123" β WorkspaceUri(file_ref="abc123", mime_type=None, is_path=False)
+ "workspace://abc123#video/mp4" β WorkspaceUri(file_ref="abc123", mime_type="video/mp4", is_path=False)
+ "workspace:///path/to/file.txt" β WorkspaceUri(file_ref="/path/to/file.txt", mime_type=None, is_path=True)
+ """
+ raw = uri.removeprefix("workspace://")
+ mime_type: str | None = None
+ if "#" in raw:
+ raw, fragment = raw.split("#", 1)
+ mime_type = fragment or None
+ return WorkspaceUri(
+ file_ref=raw,
+ mime_type=mime_type,
+ is_path=raw.startswith("/"),
+ )
+
+
# Return format options for store_media_file
# - "for_local_processing": Returns local file path - use with ffmpeg, MoviePy, PIL, etc.
# - "for_external_api": Returns data URI (base64) - use when sending content to external APIs
@@ -183,22 +214,20 @@ async def store_media_file(
"This file type is only available in CoPilot sessions."
)
- # Parse workspace reference
- # workspace://abc123 - by file ID
- # workspace:///path/to/file.txt - by virtual path
- file_ref = file[12:] # Remove "workspace://"
+ # Parse workspace reference (strips #mimeType fragment from file ID)
+ ws = parse_workspace_uri(file)
- if file_ref.startswith("/"):
- # Path reference
- workspace_content = await workspace_manager.read_file(file_ref)
- file_info = await workspace_manager.get_file_info_by_path(file_ref)
+ if ws.is_path:
+ # Path reference: workspace:///path/to/file.txt
+ workspace_content = await workspace_manager.read_file(ws.file_ref)
+ file_info = await workspace_manager.get_file_info_by_path(ws.file_ref)
filename = sanitize_filename(
file_info.name if file_info else f"{uuid.uuid4()}.bin"
)
else:
- # ID reference
- workspace_content = await workspace_manager.read_file_by_id(file_ref)
- file_info = await workspace_manager.get_file_info(file_ref)
+ # ID reference: workspace://abc123 or workspace://abc123#video/mp4
+ workspace_content = await workspace_manager.read_file_by_id(ws.file_ref)
+ file_info = await workspace_manager.get_file_info(ws.file_ref)
filename = sanitize_filename(
file_info.name if file_info else f"{uuid.uuid4()}.bin"
)
@@ -334,7 +363,21 @@ async def store_media_file(
# Don't re-save if input was already from workspace
if is_from_workspace:
- # Return original workspace reference
+ # Return original workspace reference, ensuring MIME type fragment
+ ws = parse_workspace_uri(file)
+ if not ws.mime_type:
+ # Add MIME type fragment if missing (older refs without it)
+ try:
+ if ws.is_path:
+ info = await workspace_manager.get_file_info_by_path(
+ ws.file_ref
+ )
+ else:
+ info = await workspace_manager.get_file_info(ws.file_ref)
+ if info:
+ return MediaFileType(f"{file}#{info.mimeType}")
+ except Exception:
+ pass
return MediaFileType(file)
# Save new content to workspace
@@ -346,7 +389,7 @@ async def store_media_file(
filename=filename,
overwrite=True,
)
- return MediaFileType(f"workspace://{file_record.id}")
+ return MediaFileType(f"workspace://{file_record.id}#{file_record.mimeType}")
else:
raise ValueError(f"Invalid return_format: {return_format}")
diff --git a/autogpt_platform/backend/backend/util/settings.py b/autogpt_platform/backend/backend/util/settings.py
index aa28a4c9ac..50b7428160 100644
--- a/autogpt_platform/backend/backend/util/settings.py
+++ b/autogpt_platform/backend/backend/util/settings.py
@@ -656,6 +656,7 @@ class Secrets(UpdateTrackingModel["Secrets"], BaseSettings):
e2b_api_key: str = Field(default="", description="E2B API key")
nvidia_api_key: str = Field(default="", description="Nvidia API key")
mem0_api_key: str = Field(default="", description="Mem0 API key")
+ elevenlabs_api_key: str = Field(default="", description="ElevenLabs API key")
linear_client_id: str = Field(default="", description="Linear client ID")
linear_client_secret: str = Field(default="", description="Linear client secret")
diff --git a/autogpt_platform/backend/backend/util/workspace.py b/autogpt_platform/backend/backend/util/workspace.py
index a2f1a61b9e..86413b640a 100644
--- a/autogpt_platform/backend/backend/util/workspace.py
+++ b/autogpt_platform/backend/backend/util/workspace.py
@@ -22,6 +22,7 @@ from backend.data.workspace import (
soft_delete_workspace_file,
)
from backend.util.settings import Config
+from backend.util.virus_scanner import scan_content_safe
from backend.util.workspace_storage import compute_file_checksum, get_workspace_storage
logger = logging.getLogger(__name__)
@@ -187,6 +188,9 @@ class WorkspaceManager:
f"{Config().max_file_size_mb}MB limit"
)
+ # Virus scan content before persisting (defense in depth)
+ await scan_content_safe(content, filename=filename)
+
# Determine path with session scoping
if path is None:
path = f"/{filename}"
diff --git a/autogpt_platform/backend/poetry.lock b/autogpt_platform/backend/poetry.lock
index 91ac358ade..61da8c974f 100644
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
@@ -1169,6 +1169,29 @@ attrs = ">=21.3.0"
e2b = ">=1.5.4,<2.0.0"
httpx = ">=0.20.0,<1.0.0"
+[[package]]
+name = "elevenlabs"
+version = "1.59.0"
+description = ""
+optional = false
+python-versions = "<4.0,>=3.8"
+groups = ["main"]
+files = [
+ {file = "elevenlabs-1.59.0-py3-none-any.whl", hash = "sha256:468145db81a0bc867708b4a8619699f75583e9481b395ec1339d0b443da771ed"},
+ {file = "elevenlabs-1.59.0.tar.gz", hash = "sha256:16e735bd594e86d415dd445d249c8cc28b09996cfd627fbc10102c0a84698859"},
+]
+
+[package.dependencies]
+httpx = ">=0.21.2"
+pydantic = ">=1.9.2"
+pydantic-core = ">=2.18.2,<3.0.0"
+requests = ">=2.20"
+typing_extensions = ">=4.0.0"
+websockets = ">=11.0"
+
+[package.extras]
+pyaudio = ["pyaudio (>=0.2.14)"]
+
[[package]]
name = "email-validator"
version = "2.2.0"
@@ -7361,6 +7384,28 @@ files = [
defusedxml = ">=0.7.1,<0.8.0"
requests = "*"
+[[package]]
+name = "yt-dlp"
+version = "2025.12.8"
+description = "A feature-rich command-line audio/video downloader"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+files = [
+ {file = "yt_dlp-2025.12.8-py3-none-any.whl", hash = "sha256:36e2584342e409cfbfa0b5e61448a1c5189e345cf4564294456ee509e7d3e065"},
+ {file = "yt_dlp-2025.12.8.tar.gz", hash = "sha256:b773c81bb6b71cb2c111cfb859f453c7a71cf2ef44eff234ff155877184c3e4f"},
+]
+
+[package.extras]
+build = ["build", "hatchling (>=1.27.0)", "pip", "setuptools (>=71.0.2)", "wheel"]
+curl-cffi = ["curl-cffi (>=0.5.10,<0.6.dev0 || >=0.10.dev0,<0.14) ; implementation_name == \"cpython\""]
+default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=2.0.2,<3)", "websockets (>=13.0)", "yt-dlp-ejs (==0.3.2)"]
+dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.14.0,<0.15.0)"]
+pyinstaller = ["pyinstaller (>=6.17.0)"]
+secretstorage = ["cffi", "secretstorage"]
+static-analysis = ["autopep8 (>=2.0,<3.0)", "ruff (>=0.14.0,<0.15.0)"]
+test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
+
[[package]]
name = "zerobouncesdk"
version = "1.1.2"
@@ -7512,4 +7557,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.14"
-content-hash = "ee5742dc1a9df50dfc06d4b26a1682cbb2b25cab6b79ce5625ec272f93e4f4bf"
+content-hash = "8239323f9ae6713224dffd1fe8ba8b449fe88b6c3c7a90940294a74f43a0387a"
diff --git a/autogpt_platform/backend/pyproject.toml b/autogpt_platform/backend/pyproject.toml
index fe263e47c0..24aea39f33 100644
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -20,6 +20,7 @@ click = "^8.2.0"
cryptography = "^45.0"
discord-py = "^2.5.2"
e2b-code-interpreter = "^1.5.2"
+elevenlabs = "^1.50.0"
fastapi = "^0.116.1"
feedparser = "^6.0.11"
flake8 = "^7.3.0"
@@ -71,6 +72,7 @@ tweepy = "^4.16.0"
uvicorn = { extras = ["standard"], version = "^0.35.0" }
websockets = "^15.0"
youtube-transcript-api = "^1.2.1"
+yt-dlp = "2025.12.08"
zerobouncesdk = "^1.1.2"
# NOTE: please insert new dependencies in their alphabetical location
pytest-snapshot = "^0.9.0"
diff --git a/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx b/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx
index 4213711447..c58bdac642 100644
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx
@@ -1,6 +1,6 @@
import { beautifyString } from "@/lib/utils";
import { Clipboard, Maximize2 } from "lucide-react";
-import React, { useState } from "react";
+import React, { useMemo, useState } from "react";
import { Button } from "../../../../../components/__legacy__/ui/button";
import { ContentRenderer } from "../../../../../components/__legacy__/ui/render";
import {
@@ -11,6 +11,12 @@ import {
TableHeader,
TableRow,
} from "../../../../../components/__legacy__/ui/table";
+import type { OutputMetadata } from "@/components/contextual/OutputRenderers";
+import {
+ globalRegistry,
+ OutputItem,
+} from "@/components/contextual/OutputRenderers";
+import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
import { useToast } from "../../../../../components/molecules/Toast/use-toast";
import ExpandableOutputDialog from "./ExpandableOutputDialog";
@@ -26,6 +32,9 @@ export default function DataTable({
data,
}: DataTableProps) {
const { toast } = useToast();
+ const enableEnhancedOutputHandling = useGetFlag(
+ Flag.ENABLE_ENHANCED_OUTPUT_HANDLING,
+ );
const [expandedDialog, setExpandedDialog] = useState<{
isOpen: boolean;
execId: string;
@@ -33,6 +42,15 @@ export default function DataTable({
data: any[];
} | null>(null);
+ // Prepare renderers for each item when enhanced mode is enabled
+ const getItemRenderer = useMemo(() => {
+ if (!enableEnhancedOutputHandling) return null;
+ return (item: unknown) => {
+ const metadata: OutputMetadata = {};
+ return globalRegistry.getRenderer(item, metadata);
+ };
+ }, [enableEnhancedOutputHandling]);
+
const copyData = (pin: string, data: string) => {
navigator.clipboard.writeText(data).then(() => {
toast({
@@ -102,15 +120,31 @@ export default function DataTable({