diff --git a/autogpt_platform/backend/.env.default b/autogpt_platform/backend/.env.default
index b393f13017..fa52ba812a 100644
--- a/autogpt_platform/backend/.env.default
+++ b/autogpt_platform/backend/.env.default
@@ -152,6 +152,7 @@ REPLICATE_API_KEY=
REVID_API_KEY=
SCREENSHOTONE_API_KEY=
UNREAL_SPEECH_API_KEY=
+ELEVENLABS_API_KEY=
# Data & Search Services
E2B_API_KEY=
diff --git a/autogpt_platform/backend/Dockerfile b/autogpt_platform/backend/Dockerfile
index 103226d079..9bd455e490 100644
--- a/autogpt_platform/backend/Dockerfile
+++ b/autogpt_platform/backend/Dockerfile
@@ -62,10 +62,12 @@ ENV POETRY_HOME=/opt/poetry \
DEBIAN_FRONTEND=noninteractive
ENV PATH=/opt/poetry/bin:$PATH
-# Install Python without upgrading system-managed packages
+# Install Python, FFmpeg, and ImageMagick (required for video processing blocks)
RUN apt-get update && apt-get install -y \
python3.13 \
python3-pip \
+ ffmpeg \
+ imagemagick \
&& rm -rf /var/lib/apt/lists/*
# Copy only necessary files from builder
diff --git a/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py b/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py
new file mode 100644
index 0000000000..b823627b43
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py
@@ -0,0 +1,28 @@
+"""ElevenLabs integration blocks - test credentials and shared utilities."""
+
+from typing import Literal
+
+from pydantic import SecretStr
+
+from backend.data.model import APIKeyCredentials, CredentialsMetaInput
+from backend.integrations.providers import ProviderName
+
+TEST_CREDENTIALS = APIKeyCredentials(
+ id="01234567-89ab-cdef-0123-456789abcdef",
+ provider="elevenlabs",
+ api_key=SecretStr("mock-elevenlabs-api-key"),
+ title="Mock ElevenLabs API key",
+ expires_at=None,
+)
+
+TEST_CREDENTIALS_INPUT = {
+ "provider": TEST_CREDENTIALS.provider,
+ "id": TEST_CREDENTIALS.id,
+ "type": TEST_CREDENTIALS.type,
+ "title": TEST_CREDENTIALS.title,
+}
+
+ElevenLabsCredentials = APIKeyCredentials
+ElevenLabsCredentialsInput = CredentialsMetaInput[
+ Literal[ProviderName.ELEVENLABS], Literal["api_key"]
+]
diff --git a/autogpt_platform/backend/backend/blocks/media.py b/autogpt_platform/backend/backend/blocks/media.py
deleted file mode 100644
index a8d145bc64..0000000000
--- a/autogpt_platform/backend/backend/blocks/media.py
+++ /dev/null
@@ -1,246 +0,0 @@
-import os
-import tempfile
-from typing import Optional
-
-from moviepy.audio.io.AudioFileClip import AudioFileClip
-from moviepy.video.fx.Loop import Loop
-from moviepy.video.io.VideoFileClip import VideoFileClip
-
-from backend.data.block import (
- Block,
- BlockCategory,
- BlockOutput,
- BlockSchemaInput,
- BlockSchemaOutput,
-)
-from backend.data.execution import ExecutionContext
-from backend.data.model import SchemaField
-from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
-
-
-class MediaDurationBlock(Block):
-
- class Input(BlockSchemaInput):
- media_in: MediaFileType = SchemaField(
- description="Media input (URL, data URI, or local path)."
- )
- is_video: bool = SchemaField(
- description="Whether the media is a video (True) or audio (False).",
- default=True,
- )
-
- class Output(BlockSchemaOutput):
- duration: float = SchemaField(
- description="Duration of the media file (in seconds)."
- )
-
- def __init__(self):
- super().__init__(
- id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6",
- description="Block to get the duration of a media file.",
- categories={BlockCategory.MULTIMEDIA},
- input_schema=MediaDurationBlock.Input,
- output_schema=MediaDurationBlock.Output,
- )
-
- async def run(
- self,
- input_data: Input,
- *,
- execution_context: ExecutionContext,
- **kwargs,
- ) -> BlockOutput:
- # 1) Store the input media locally
- local_media_path = await store_media_file(
- file=input_data.media_in,
- execution_context=execution_context,
- return_format="for_local_processing",
- )
- assert execution_context.graph_exec_id is not None
- media_abspath = get_exec_file_path(
- execution_context.graph_exec_id, local_media_path
- )
-
- # 2) Load the clip
- if input_data.is_video:
- clip = VideoFileClip(media_abspath)
- else:
- clip = AudioFileClip(media_abspath)
-
- yield "duration", clip.duration
-
-
-class LoopVideoBlock(Block):
- """
- Block for looping (repeating) a video clip until a given duration or number of loops.
- """
-
- class Input(BlockSchemaInput):
- video_in: MediaFileType = SchemaField(
- description="The input video (can be a URL, data URI, or local path)."
- )
- # Provide EITHER a `duration` or `n_loops` or both. We'll demonstrate `duration`.
- duration: Optional[float] = SchemaField(
- description="Target duration (in seconds) to loop the video to. If omitted, defaults to no looping.",
- default=None,
- ge=0.0,
- )
- n_loops: Optional[int] = SchemaField(
- description="Number of times to repeat the video. If omitted, defaults to 1 (no repeat).",
- default=None,
- ge=1,
- )
-
- class Output(BlockSchemaOutput):
- video_out: str = SchemaField(
- description="Looped video returned either as a relative path or a data URI."
- )
-
- def __init__(self):
- super().__init__(
- id="8bf9eef6-5451-4213-b265-25306446e94b",
- description="Block to loop a video to a given duration or number of repeats.",
- categories={BlockCategory.MULTIMEDIA},
- input_schema=LoopVideoBlock.Input,
- output_schema=LoopVideoBlock.Output,
- )
-
- async def run(
- self,
- input_data: Input,
- *,
- execution_context: ExecutionContext,
- **kwargs,
- ) -> BlockOutput:
- assert execution_context.graph_exec_id is not None
- assert execution_context.node_exec_id is not None
- graph_exec_id = execution_context.graph_exec_id
- node_exec_id = execution_context.node_exec_id
-
- # 1) Store the input video locally
- local_video_path = await store_media_file(
- file=input_data.video_in,
- execution_context=execution_context,
- return_format="for_local_processing",
- )
- input_abspath = get_exec_file_path(graph_exec_id, local_video_path)
-
- # 2) Load the clip
- clip = VideoFileClip(input_abspath)
-
- # 3) Apply the loop effect
- looped_clip = clip
- if input_data.duration:
- # Loop until we reach the specified duration
- looped_clip = looped_clip.with_effects([Loop(duration=input_data.duration)])
- elif input_data.n_loops:
- looped_clip = looped_clip.with_effects([Loop(n=input_data.n_loops)])
- else:
- raise ValueError("Either 'duration' or 'n_loops' must be provided.")
-
- assert isinstance(looped_clip, VideoFileClip)
-
- # 4) Save the looped output
- output_filename = MediaFileType(
- f"{node_exec_id}_looped_{os.path.basename(local_video_path)}"
- )
- output_abspath = get_exec_file_path(graph_exec_id, output_filename)
-
- looped_clip = looped_clip.with_audio(clip.audio)
- looped_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
-
- # Return output - for_block_output returns workspace:// if available, else data URI
- video_out = await store_media_file(
- file=output_filename,
- execution_context=execution_context,
- return_format="for_block_output",
- )
-
- yield "video_out", video_out
-
-
-class AddAudioToVideoBlock(Block):
- """
- Block that adds (attaches) an audio track to an existing video.
- Optionally scale the volume of the new track.
- """
-
- class Input(BlockSchemaInput):
- video_in: MediaFileType = SchemaField(
- description="Video input (URL, data URI, or local path)."
- )
- audio_in: MediaFileType = SchemaField(
- description="Audio input (URL, data URI, or local path)."
- )
- volume: float = SchemaField(
- description="Volume scale for the newly attached audio track (1.0 = original).",
- default=1.0,
- )
-
- class Output(BlockSchemaOutput):
- video_out: MediaFileType = SchemaField(
- description="Final video (with attached audio), as a path or data URI."
- )
-
- def __init__(self):
- super().__init__(
- id="3503748d-62b6-4425-91d6-725b064af509",
- description="Block to attach an audio file to a video file using moviepy.",
- categories={BlockCategory.MULTIMEDIA},
- input_schema=AddAudioToVideoBlock.Input,
- output_schema=AddAudioToVideoBlock.Output,
- )
-
- async def run(
- self,
- input_data: Input,
- *,
- execution_context: ExecutionContext,
- **kwargs,
- ) -> BlockOutput:
- assert execution_context.graph_exec_id is not None
- assert execution_context.node_exec_id is not None
- graph_exec_id = execution_context.graph_exec_id
- node_exec_id = execution_context.node_exec_id
-
- # 1) Store the inputs locally
- local_video_path = await store_media_file(
- file=input_data.video_in,
- execution_context=execution_context,
- return_format="for_local_processing",
- )
- local_audio_path = await store_media_file(
- file=input_data.audio_in,
- execution_context=execution_context,
- return_format="for_local_processing",
- )
-
- abs_temp_dir = os.path.join(tempfile.gettempdir(), "exec_file", graph_exec_id)
- video_abspath = os.path.join(abs_temp_dir, local_video_path)
- audio_abspath = os.path.join(abs_temp_dir, local_audio_path)
-
- # 2) Load video + audio with moviepy
- video_clip = VideoFileClip(video_abspath)
- audio_clip = AudioFileClip(audio_abspath)
- # Optionally scale volume
- if input_data.volume != 1.0:
- audio_clip = audio_clip.with_volume_scaled(input_data.volume)
-
- # 3) Attach the new audio track
- final_clip = video_clip.with_audio(audio_clip)
-
- # 4) Write to output file
- output_filename = MediaFileType(
- f"{node_exec_id}_audio_attached_{os.path.basename(local_video_path)}"
- )
- output_abspath = os.path.join(abs_temp_dir, output_filename)
- final_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
-
- # 5) Return output - for_block_output returns workspace:// if available, else data URI
- video_out = await store_media_file(
- file=output_filename,
- execution_context=execution_context,
- return_format="for_block_output",
- )
-
- yield "video_out", video_out
diff --git a/autogpt_platform/backend/backend/blocks/video/__init__.py b/autogpt_platform/backend/backend/blocks/video/__init__.py
new file mode 100644
index 0000000000..4974ae8a87
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/__init__.py
@@ -0,0 +1,37 @@
+"""Video editing blocks for AutoGPT Platform.
+
+This module provides blocks for:
+- Downloading videos from URLs (YouTube, Vimeo, news sites, direct links)
+- Clipping/trimming video segments
+- Concatenating multiple videos
+- Adding text overlays
+- Adding AI-generated narration
+- Getting media duration
+- Looping videos
+- Adding audio to videos
+
+Dependencies:
+- yt-dlp: For video downloading
+- moviepy: For video editing operations
+- elevenlabs: For AI narration (optional)
+"""
+
+from backend.blocks.video.add_audio import AddAudioToVideoBlock
+from backend.blocks.video.clip import VideoClipBlock
+from backend.blocks.video.concat import VideoConcatBlock
+from backend.blocks.video.download import VideoDownloadBlock
+from backend.blocks.video.duration import MediaDurationBlock
+from backend.blocks.video.loop import LoopVideoBlock
+from backend.blocks.video.narration import VideoNarrationBlock
+from backend.blocks.video.text_overlay import VideoTextOverlayBlock
+
+__all__ = [
+ "AddAudioToVideoBlock",
+ "LoopVideoBlock",
+ "MediaDurationBlock",
+ "VideoClipBlock",
+ "VideoConcatBlock",
+ "VideoDownloadBlock",
+ "VideoNarrationBlock",
+ "VideoTextOverlayBlock",
+]
diff --git a/autogpt_platform/backend/backend/blocks/video/_utils.py b/autogpt_platform/backend/backend/blocks/video/_utils.py
new file mode 100644
index 0000000000..9ebf195078
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/_utils.py
@@ -0,0 +1,131 @@
+"""Shared utilities for video blocks."""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+import subprocess
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Known operation tags added by video blocks
+_VIDEO_OPS = (
+ r"(?:clip|overlay|narrated|looped|concat|audio_attached|with_audio|narration)"
+)
+
+# Matches: {node_exec_id}_{operation}_ where node_exec_id contains a UUID
+_BLOCK_PREFIX_RE = re.compile(
+ r"^[a-zA-Z0-9_-]*"
+ r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
+ r"[a-zA-Z0-9_-]*"
+ r"_" + _VIDEO_OPS + r"_"
+)
+
+# Matches: a lone {node_exec_id}_ prefix (no operation keyword, e.g. download output)
+_UUID_PREFIX_RE = re.compile(
+ r"^[a-zA-Z0-9_-]*"
+ r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
+ r"[a-zA-Z0-9_-]*_"
+)
+
+
+def extract_source_name(input_path: str, max_length: int = 50) -> str:
+ """Extract the original source filename by stripping block-generated prefixes.
+
+ Iteratively removes {node_exec_id}_{operation}_ prefixes that accumulate
+ when chaining video blocks, recovering the original human-readable name.
+
+ Safe for plain filenames (no UUID -> no stripping).
+ Falls back to "video" if everything is stripped.
+ """
+ stem = Path(input_path).stem
+
+ # Pass 1: strip {node_exec_id}_{operation}_ prefixes iteratively
+ while _BLOCK_PREFIX_RE.match(stem):
+ stem = _BLOCK_PREFIX_RE.sub("", stem, count=1)
+
+ # Pass 2: strip a lone {node_exec_id}_ prefix (e.g. from download block)
+ if _UUID_PREFIX_RE.match(stem):
+ stem = _UUID_PREFIX_RE.sub("", stem, count=1)
+
+ if not stem:
+ return "video"
+
+ return stem[:max_length]
+
+
+def get_video_codecs(output_path: str) -> tuple[str, str]:
+ """Get appropriate video and audio codecs based on output file extension.
+
+ Args:
+ output_path: Path to the output file (used to determine extension)
+
+ Returns:
+ Tuple of (video_codec, audio_codec)
+
+ Codec mappings:
+ - .mp4: H.264 + AAC (universal compatibility)
+ - .webm: VP8 + Vorbis (web streaming)
+ - .mkv: H.264 + AAC (container supports many codecs)
+ - .mov: H.264 + AAC (Apple QuickTime, widely compatible)
+ - .m4v: H.264 + AAC (Apple iTunes/devices)
+ - .avi: MPEG-4 + MP3 (legacy Windows)
+ """
+ ext = os.path.splitext(output_path)[1].lower()
+
+ codec_map: dict[str, tuple[str, str]] = {
+ ".mp4": ("libx264", "aac"),
+ ".webm": ("libvpx", "libvorbis"),
+ ".mkv": ("libx264", "aac"),
+ ".mov": ("libx264", "aac"),
+ ".m4v": ("libx264", "aac"),
+ ".avi": ("mpeg4", "libmp3lame"),
+ }
+
+ return codec_map.get(ext, ("libx264", "aac"))
+
+
+def strip_chapters_inplace(video_path: str) -> None:
+ """Strip chapter metadata from a media file in-place using ffmpeg.
+
+ MoviePy 2.x crashes with IndexError when parsing files with embedded
+ chapter metadata (https://github.com/Zulko/moviepy/issues/2419).
+ This strips chapters without re-encoding.
+
+ Args:
+ video_path: Absolute path to the media file to strip chapters from.
+ """
+ base, ext = os.path.splitext(video_path)
+ tmp_path = base + ".tmp" + ext
+ try:
+ result = subprocess.run(
+ [
+ "ffmpeg",
+ "-y",
+ "-i",
+ video_path,
+ "-map_chapters",
+ "-1",
+ "-codec",
+ "copy",
+ tmp_path,
+ ],
+ capture_output=True,
+ text=True,
+ timeout=300,
+ )
+ if result.returncode != 0:
+ logger.warning(
+ "ffmpeg chapter strip failed (rc=%d): %s",
+ result.returncode,
+ result.stderr,
+ )
+ return
+ os.replace(tmp_path, video_path)
+ except FileNotFoundError:
+ logger.warning("ffmpeg not found; skipping chapter strip")
+ finally:
+ if os.path.exists(tmp_path):
+ os.unlink(tmp_path)
diff --git a/autogpt_platform/backend/backend/blocks/video/add_audio.py b/autogpt_platform/backend/backend/blocks/video/add_audio.py
new file mode 100644
index 0000000000..ebd4ab94f2
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/add_audio.py
@@ -0,0 +1,113 @@
+"""AddAudioToVideoBlock - Attach an audio track to a video file."""
+
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import extract_source_name, strip_chapters_inplace
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class AddAudioToVideoBlock(Block):
+ """Add (attach) an audio track to an existing video."""
+
+ class Input(BlockSchemaInput):
+ video_in: MediaFileType = SchemaField(
+ description="Video input (URL, data URI, or local path)."
+ )
+ audio_in: MediaFileType = SchemaField(
+ description="Audio input (URL, data URI, or local path)."
+ )
+ volume: float = SchemaField(
+ description="Volume scale for the newly attached audio track (1.0 = original).",
+ default=1.0,
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Final video (with attached audio), as a path or data URI."
+ )
+
+ def __init__(self):
+ super().__init__(
+ id="3503748d-62b6-4425-91d6-725b064af509",
+ description="Block to attach an audio file to a video file using moviepy.",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=AddAudioToVideoBlock.Input,
+ output_schema=AddAudioToVideoBlock.Output,
+ )
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ **kwargs,
+ ) -> BlockOutput:
+ assert execution_context.graph_exec_id is not None
+ assert execution_context.node_exec_id is not None
+ graph_exec_id = execution_context.graph_exec_id
+ node_exec_id = execution_context.node_exec_id
+
+ # 1) Store the inputs locally
+ local_video_path = await store_media_file(
+ file=input_data.video_in,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+ local_audio_path = await store_media_file(
+ file=input_data.audio_in,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+
+ video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+ audio_abspath = get_exec_file_path(graph_exec_id, local_audio_path)
+
+ # 2) Load video + audio with moviepy
+ strip_chapters_inplace(video_abspath)
+ strip_chapters_inplace(audio_abspath)
+ video_clip = None
+ audio_clip = None
+ final_clip = None
+ try:
+ video_clip = VideoFileClip(video_abspath)
+ audio_clip = AudioFileClip(audio_abspath)
+ # Optionally scale volume
+ if input_data.volume != 1.0:
+ audio_clip = audio_clip.with_volume_scaled(input_data.volume)
+
+ # 3) Attach the new audio track
+ final_clip = video_clip.with_audio(audio_clip)
+
+ # 4) Write to output file
+ source = extract_source_name(local_video_path)
+ output_filename = MediaFileType(f"{node_exec_id}_with_audio_{source}.mp4")
+ output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+ final_clip.write_videofile(
+ output_abspath, codec="libx264", audio_codec="aac"
+ )
+ finally:
+ if final_clip:
+ final_clip.close()
+ if audio_clip:
+ audio_clip.close()
+ if video_clip:
+ video_clip.close()
+
+ # 5) Return output - for_block_output returns workspace:// if available, else data URI
+ video_out = await store_media_file(
+ file=output_filename,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ yield "video_out", video_out
diff --git a/autogpt_platform/backend/backend/blocks/video/clip.py b/autogpt_platform/backend/backend/blocks/video/clip.py
new file mode 100644
index 0000000000..05deea6530
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/clip.py
@@ -0,0 +1,167 @@
+"""VideoClipBlock - Extract a segment from a video file."""
+
+from typing import Literal
+
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import (
+ extract_source_name,
+ get_video_codecs,
+ strip_chapters_inplace,
+)
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoClipBlock(Block):
+ """Extract a time segment from a video."""
+
+ class Input(BlockSchemaInput):
+ video_in: MediaFileType = SchemaField(
+ description="Input video (URL, data URI, or local path)"
+ )
+ start_time: float = SchemaField(description="Start time in seconds", ge=0.0)
+ end_time: float = SchemaField(description="End time in seconds", ge=0.0)
+ output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField(
+ description="Output format", default="mp4", advanced=True
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Clipped video file (path or data URI)"
+ )
+ duration: float = SchemaField(description="Clip duration in seconds")
+
+ def __init__(self):
+ super().__init__(
+ id="8f539119-e580-4d86-ad41-86fbcb22abb1",
+ description="Extract a time segment from a video",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=self.Input,
+ output_schema=self.Output,
+ test_input={
+ "video_in": "/tmp/test.mp4",
+ "start_time": 0.0,
+ "end_time": 10.0,
+ },
+ test_output=[("video_out", str), ("duration", float)],
+ test_mock={
+ "_clip_video": lambda *args: 10.0,
+ "_store_input_video": lambda *args, **kwargs: "test.mp4",
+ "_store_output_video": lambda *args, **kwargs: "clip_test.mp4",
+ },
+ )
+
+ async def _store_input_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store input video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+
+ async def _store_output_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store output video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ def _clip_video(
+ self,
+ video_abspath: str,
+ output_abspath: str,
+ start_time: float,
+ end_time: float,
+ ) -> float:
+ """Extract a clip from a video. Extracted for testability."""
+ clip = None
+ subclip = None
+ try:
+ strip_chapters_inplace(video_abspath)
+ clip = VideoFileClip(video_abspath)
+ subclip = clip.subclipped(start_time, end_time)
+ video_codec, audio_codec = get_video_codecs(output_abspath)
+ subclip.write_videofile(
+ output_abspath, codec=video_codec, audio_codec=audio_codec
+ )
+ return subclip.duration
+ finally:
+ if subclip:
+ subclip.close()
+ if clip:
+ clip.close()
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ node_exec_id: str,
+ **kwargs,
+ ) -> BlockOutput:
+ # Validate time range
+ if input_data.end_time <= input_data.start_time:
+ raise BlockExecutionError(
+ message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})",
+ block_name=self.name,
+ block_id=str(self.id),
+ )
+
+ try:
+ assert execution_context.graph_exec_id is not None
+
+ # Store the input video locally
+ local_video_path = await self._store_input_video(
+ execution_context, input_data.video_in
+ )
+ video_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, local_video_path
+ )
+
+ # Build output path
+ source = extract_source_name(local_video_path)
+ output_filename = MediaFileType(
+ f"{node_exec_id}_clip_{source}.{input_data.output_format}"
+ )
+ output_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, output_filename
+ )
+
+ duration = self._clip_video(
+ video_abspath,
+ output_abspath,
+ input_data.start_time,
+ input_data.end_time,
+ )
+
+ # Return as workspace path or data URI based on context
+ video_out = await self._store_output_video(
+ execution_context, output_filename
+ )
+
+ yield "video_out", video_out
+ yield "duration", duration
+
+ except BlockExecutionError:
+ raise
+ except Exception as e:
+ raise BlockExecutionError(
+ message=f"Failed to clip video: {e}",
+ block_name=self.name,
+ block_id=str(self.id),
+ ) from e
diff --git a/autogpt_platform/backend/backend/blocks/video/concat.py b/autogpt_platform/backend/backend/blocks/video/concat.py
new file mode 100644
index 0000000000..b49854fb40
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/concat.py
@@ -0,0 +1,227 @@
+"""VideoConcatBlock - Concatenate multiple video clips into one."""
+
+from typing import Literal
+
+from moviepy import concatenate_videoclips
+from moviepy.video.fx import CrossFadeIn, CrossFadeOut, FadeIn, FadeOut
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import (
+ extract_source_name,
+ get_video_codecs,
+ strip_chapters_inplace,
+)
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoConcatBlock(Block):
+ """Merge multiple video clips into one continuous video."""
+
+ class Input(BlockSchemaInput):
+ videos: list[MediaFileType] = SchemaField(
+ description="List of video files to concatenate (in order)"
+ )
+ transition: Literal["none", "crossfade", "fade_black"] = SchemaField(
+ description="Transition between clips", default="none"
+ )
+ transition_duration: int = SchemaField(
+ description="Transition duration in seconds",
+ default=1,
+ ge=0,
+ advanced=True,
+ )
+ output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField(
+ description="Output format", default="mp4", advanced=True
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Concatenated video file (path or data URI)"
+ )
+ total_duration: float = SchemaField(description="Total duration in seconds")
+
+ def __init__(self):
+ super().__init__(
+ id="9b0f531a-1118-487f-aeec-3fa63ea8900a",
+ description="Merge multiple video clips into one continuous video",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=self.Input,
+ output_schema=self.Output,
+ test_input={
+ "videos": ["/tmp/a.mp4", "/tmp/b.mp4"],
+ },
+ test_output=[
+ ("video_out", str),
+ ("total_duration", float),
+ ],
+ test_mock={
+ "_concat_videos": lambda *args: 20.0,
+ "_store_input_video": lambda *args, **kwargs: "test.mp4",
+ "_store_output_video": lambda *args, **kwargs: "concat_test.mp4",
+ },
+ )
+
+ async def _store_input_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store input video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+
+ async def _store_output_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store output video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ def _concat_videos(
+ self,
+ video_abspaths: list[str],
+ output_abspath: str,
+ transition: str,
+ transition_duration: int,
+ ) -> float:
+ """Concatenate videos. Extracted for testability.
+
+ Returns:
+ Total duration of the concatenated video.
+ """
+ clips = []
+ faded_clips = []
+ final = None
+ try:
+ # Load clips
+ for v in video_abspaths:
+ strip_chapters_inplace(v)
+ clips.append(VideoFileClip(v))
+
+ # Validate transition_duration against shortest clip
+ if transition in {"crossfade", "fade_black"} and transition_duration > 0:
+ min_duration = min(c.duration for c in clips)
+ if transition_duration >= min_duration:
+ raise BlockExecutionError(
+ message=(
+ f"transition_duration ({transition_duration}s) must be "
+ f"shorter than the shortest clip ({min_duration:.2f}s)"
+ ),
+ block_name=self.name,
+ block_id=str(self.id),
+ )
+
+ if transition == "crossfade":
+ for i, clip in enumerate(clips):
+ effects = []
+ if i > 0:
+ effects.append(CrossFadeIn(transition_duration))
+ if i < len(clips) - 1:
+ effects.append(CrossFadeOut(transition_duration))
+ if effects:
+ clip = clip.with_effects(effects)
+ faded_clips.append(clip)
+ final = concatenate_videoclips(
+ faded_clips,
+ method="compose",
+ padding=-transition_duration,
+ )
+ elif transition == "fade_black":
+ for clip in clips:
+ faded = clip.with_effects(
+ [FadeIn(transition_duration), FadeOut(transition_duration)]
+ )
+ faded_clips.append(faded)
+ final = concatenate_videoclips(faded_clips)
+ else:
+ final = concatenate_videoclips(clips)
+
+ video_codec, audio_codec = get_video_codecs(output_abspath)
+ final.write_videofile(
+ output_abspath, codec=video_codec, audio_codec=audio_codec
+ )
+
+ return final.duration
+ finally:
+ if final:
+ final.close()
+ for clip in faded_clips:
+ clip.close()
+ for clip in clips:
+ clip.close()
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ node_exec_id: str,
+ **kwargs,
+ ) -> BlockOutput:
+ # Validate minimum clips
+ if len(input_data.videos) < 2:
+ raise BlockExecutionError(
+ message="At least 2 videos are required for concatenation",
+ block_name=self.name,
+ block_id=str(self.id),
+ )
+
+ try:
+ assert execution_context.graph_exec_id is not None
+
+ # Store all input videos locally
+ video_abspaths = []
+ for video in input_data.videos:
+ local_path = await self._store_input_video(execution_context, video)
+ video_abspaths.append(
+ get_exec_file_path(execution_context.graph_exec_id, local_path)
+ )
+
+ # Build output path
+ source = (
+ extract_source_name(video_abspaths[0]) if video_abspaths else "video"
+ )
+ output_filename = MediaFileType(
+ f"{node_exec_id}_concat_{source}.{input_data.output_format}"
+ )
+ output_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, output_filename
+ )
+
+ total_duration = self._concat_videos(
+ video_abspaths,
+ output_abspath,
+ input_data.transition,
+ input_data.transition_duration,
+ )
+
+ # Return as workspace path or data URI based on context
+ video_out = await self._store_output_video(
+ execution_context, output_filename
+ )
+
+ yield "video_out", video_out
+ yield "total_duration", total_duration
+
+ except BlockExecutionError:
+ raise
+ except Exception as e:
+ raise BlockExecutionError(
+ message=f"Failed to concatenate videos: {e}",
+ block_name=self.name,
+ block_id=str(self.id),
+ ) from e
diff --git a/autogpt_platform/backend/backend/blocks/video/download.py b/autogpt_platform/backend/backend/blocks/video/download.py
new file mode 100644
index 0000000000..4046d5df42
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/download.py
@@ -0,0 +1,172 @@
+"""VideoDownloadBlock - Download video from URL (YouTube, Vimeo, news sites, direct links)."""
+
+import os
+import typing
+from typing import Literal
+
+import yt_dlp
+
+if typing.TYPE_CHECKING:
+ from yt_dlp import _Params
+
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoDownloadBlock(Block):
+ """Download video from URL using yt-dlp."""
+
+ class Input(BlockSchemaInput):
+ url: str = SchemaField(
+ description="URL of the video to download (YouTube, Vimeo, direct link, etc.)",
+ placeholder="https://www.youtube.com/watch?v=...",
+ )
+ quality: Literal["best", "1080p", "720p", "480p", "audio_only"] = SchemaField(
+ description="Video quality preference", default="720p"
+ )
+ output_format: Literal["mp4", "webm", "mkv"] = SchemaField(
+ description="Output video format", default="mp4", advanced=True
+ )
+
+ class Output(BlockSchemaOutput):
+ video_file: MediaFileType = SchemaField(
+ description="Downloaded video (path or data URI)"
+ )
+ duration: float = SchemaField(description="Video duration in seconds")
+ title: str = SchemaField(description="Video title from source")
+ source_url: str = SchemaField(description="Original source URL")
+
+ def __init__(self):
+ super().__init__(
+ id="c35daabb-cd60-493b-b9ad-51f1fe4b50c4",
+ description="Download video from URL (YouTube, Vimeo, news sites, direct links)",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=self.Input,
+ output_schema=self.Output,
+ disabled=True, # Disable until we can sandbox yt-dlp and handle security implications
+ test_input={
+ "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+ "quality": "480p",
+ },
+ test_output=[
+ ("video_file", str),
+ ("duration", float),
+ ("title", str),
+ ("source_url", str),
+ ],
+ test_mock={
+ "_download_video": lambda *args: (
+ "video.mp4",
+ 212.0,
+ "Test Video",
+ ),
+ "_store_output_video": lambda *args, **kwargs: "video.mp4",
+ },
+ )
+
+ async def _store_output_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store output video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ def _get_format_string(self, quality: str) -> str:
+ formats = {
+ "best": "bestvideo+bestaudio/best",
+ "1080p": "bestvideo[height<=1080]+bestaudio/best[height<=1080]",
+ "720p": "bestvideo[height<=720]+bestaudio/best[height<=720]",
+ "480p": "bestvideo[height<=480]+bestaudio/best[height<=480]",
+ "audio_only": "bestaudio/best",
+ }
+ return formats.get(quality, formats["720p"])
+
+ def _download_video(
+ self,
+ url: str,
+ quality: str,
+ output_format: str,
+ output_dir: str,
+ node_exec_id: str,
+ ) -> tuple[str, float, str]:
+ """Download video. Extracted for testability."""
+ output_template = os.path.join(
+ output_dir, f"{node_exec_id}_%(title).50s.%(ext)s"
+ )
+
+ ydl_opts: "_Params" = {
+ "format": f"{self._get_format_string(quality)}/best",
+ "outtmpl": output_template,
+ "merge_output_format": output_format,
+ "quiet": True,
+ "no_warnings": True,
+ }
+
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+ info = ydl.extract_info(url, download=True)
+ video_path = ydl.prepare_filename(info)
+
+ # Handle format conversion in filename
+ if not video_path.endswith(f".{output_format}"):
+ video_path = video_path.rsplit(".", 1)[0] + f".{output_format}"
+
+ # Return just the filename, not the full path
+ filename = os.path.basename(video_path)
+
+ return (
+ filename,
+ info.get("duration") or 0.0,
+ info.get("title") or "Unknown",
+ )
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ node_exec_id: str,
+ **kwargs,
+ ) -> BlockOutput:
+ try:
+ assert execution_context.graph_exec_id is not None
+
+ # Get the exec file directory
+ output_dir = get_exec_file_path(execution_context.graph_exec_id, "")
+ os.makedirs(output_dir, exist_ok=True)
+
+ filename, duration, title = self._download_video(
+ input_data.url,
+ input_data.quality,
+ input_data.output_format,
+ output_dir,
+ node_exec_id,
+ )
+
+ # Return as workspace path or data URI based on context
+ video_out = await self._store_output_video(
+ execution_context, MediaFileType(filename)
+ )
+
+ yield "video_file", video_out
+ yield "duration", duration
+ yield "title", title
+ yield "source_url", input_data.url
+
+ except Exception as e:
+ raise BlockExecutionError(
+ message=f"Failed to download video: {e}",
+ block_name=self.name,
+ block_id=str(self.id),
+ ) from e
diff --git a/autogpt_platform/backend/backend/blocks/video/duration.py b/autogpt_platform/backend/backend/blocks/video/duration.py
new file mode 100644
index 0000000000..9e05d35b00
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/duration.py
@@ -0,0 +1,77 @@
+"""MediaDurationBlock - Get the duration of a media file."""
+
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import strip_chapters_inplace
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class MediaDurationBlock(Block):
+ """Get the duration of a media file (video or audio)."""
+
+ class Input(BlockSchemaInput):
+ media_in: MediaFileType = SchemaField(
+ description="Media input (URL, data URI, or local path)."
+ )
+ is_video: bool = SchemaField(
+ description="Whether the media is a video (True) or audio (False).",
+ default=True,
+ )
+
+ class Output(BlockSchemaOutput):
+ duration: float = SchemaField(
+ description="Duration of the media file (in seconds)."
+ )
+
+ def __init__(self):
+ super().__init__(
+ id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6",
+ description="Block to get the duration of a media file.",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=MediaDurationBlock.Input,
+ output_schema=MediaDurationBlock.Output,
+ )
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ **kwargs,
+ ) -> BlockOutput:
+ # 1) Store the input media locally
+ local_media_path = await store_media_file(
+ file=input_data.media_in,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+ assert execution_context.graph_exec_id is not None
+ media_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, local_media_path
+ )
+
+ # 2) Strip chapters to avoid MoviePy crash, then load the clip
+ strip_chapters_inplace(media_abspath)
+ clip = None
+ try:
+ if input_data.is_video:
+ clip = VideoFileClip(media_abspath)
+ else:
+ clip = AudioFileClip(media_abspath)
+
+ duration = clip.duration
+ finally:
+ if clip:
+ clip.close()
+
+ yield "duration", duration
diff --git a/autogpt_platform/backend/backend/blocks/video/loop.py b/autogpt_platform/backend/backend/blocks/video/loop.py
new file mode 100644
index 0000000000..461610f713
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/loop.py
@@ -0,0 +1,115 @@
+"""LoopVideoBlock - Loop a video to a given duration or number of repeats."""
+
+from typing import Optional
+
+from moviepy.video.fx.Loop import Loop
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import extract_source_name, strip_chapters_inplace
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class LoopVideoBlock(Block):
+ """Loop (repeat) a video clip until a given duration or number of loops."""
+
+ class Input(BlockSchemaInput):
+ video_in: MediaFileType = SchemaField(
+ description="The input video (can be a URL, data URI, or local path)."
+ )
+ duration: Optional[float] = SchemaField(
+ description="Target duration (in seconds) to loop the video to. Either duration or n_loops must be provided.",
+ default=None,
+ ge=0.0,
+ le=3600.0, # Max 1 hour to prevent disk exhaustion
+ )
+ n_loops: Optional[int] = SchemaField(
+ description="Number of times to repeat the video. Either n_loops or duration must be provided.",
+ default=None,
+ ge=1,
+ le=10, # Max 10 loops to prevent disk exhaustion
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Looped video returned either as a relative path or a data URI."
+ )
+
+ def __init__(self):
+ super().__init__(
+ id="8bf9eef6-5451-4213-b265-25306446e94b",
+ description="Block to loop a video to a given duration or number of repeats.",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=LoopVideoBlock.Input,
+ output_schema=LoopVideoBlock.Output,
+ )
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ **kwargs,
+ ) -> BlockOutput:
+ assert execution_context.graph_exec_id is not None
+ assert execution_context.node_exec_id is not None
+ graph_exec_id = execution_context.graph_exec_id
+ node_exec_id = execution_context.node_exec_id
+
+ # 1) Store the input video locally
+ local_video_path = await store_media_file(
+ file=input_data.video_in,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+ input_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+
+ # 2) Load the clip
+ strip_chapters_inplace(input_abspath)
+ clip = None
+ looped_clip = None
+ try:
+ clip = VideoFileClip(input_abspath)
+
+ # 3) Apply the loop effect
+ if input_data.duration:
+ # Loop until we reach the specified duration
+ looped_clip = clip.with_effects([Loop(duration=input_data.duration)])
+ elif input_data.n_loops:
+ looped_clip = clip.with_effects([Loop(n=input_data.n_loops)])
+ else:
+ raise ValueError("Either 'duration' or 'n_loops' must be provided.")
+
+ assert isinstance(looped_clip, VideoFileClip)
+
+ # 4) Save the looped output
+ source = extract_source_name(local_video_path)
+ output_filename = MediaFileType(f"{node_exec_id}_looped_{source}.mp4")
+ output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+ looped_clip = looped_clip.with_audio(clip.audio)
+ looped_clip.write_videofile(
+ output_abspath, codec="libx264", audio_codec="aac"
+ )
+ finally:
+ if looped_clip:
+ looped_clip.close()
+ if clip:
+ clip.close()
+
+ # Return output - for_block_output returns workspace:// if available, else data URI
+ video_out = await store_media_file(
+ file=output_filename,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ yield "video_out", video_out
diff --git a/autogpt_platform/backend/backend/blocks/video/narration.py b/autogpt_platform/backend/backend/blocks/video/narration.py
new file mode 100644
index 0000000000..adf41753c8
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/narration.py
@@ -0,0 +1,267 @@
+"""VideoNarrationBlock - Generate AI voice narration and add to video."""
+
+import os
+from typing import Literal
+
+from elevenlabs import ElevenLabs
+from moviepy import CompositeAudioClip
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.elevenlabs._auth import (
+ TEST_CREDENTIALS,
+ TEST_CREDENTIALS_INPUT,
+ ElevenLabsCredentials,
+ ElevenLabsCredentialsInput,
+)
+from backend.blocks.video._utils import (
+ extract_source_name,
+ get_video_codecs,
+ strip_chapters_inplace,
+)
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import CredentialsField, SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoNarrationBlock(Block):
+ """Generate AI narration and add to video."""
+
+ class Input(BlockSchemaInput):
+ credentials: ElevenLabsCredentialsInput = CredentialsField(
+ description="ElevenLabs API key for voice synthesis"
+ )
+ video_in: MediaFileType = SchemaField(
+ description="Input video (URL, data URI, or local path)"
+ )
+ script: str = SchemaField(description="Narration script text")
+ voice_id: str = SchemaField(
+ description="ElevenLabs voice ID", default="21m00Tcm4TlvDq8ikWAM" # Rachel
+ )
+ model_id: Literal[
+ "eleven_multilingual_v2",
+ "eleven_flash_v2_5",
+ "eleven_turbo_v2_5",
+ "eleven_turbo_v2",
+ ] = SchemaField(
+ description="ElevenLabs TTS model",
+ default="eleven_multilingual_v2",
+ )
+ mix_mode: Literal["replace", "mix", "ducking"] = SchemaField(
+ description="How to combine with original audio. 'ducking' applies stronger attenuation than 'mix'.",
+ default="ducking",
+ )
+ narration_volume: float = SchemaField(
+ description="Narration volume (0.0 to 2.0)",
+ default=1.0,
+ ge=0.0,
+ le=2.0,
+ advanced=True,
+ )
+ original_volume: float = SchemaField(
+ description="Original audio volume when mixing (0.0 to 1.0)",
+ default=0.3,
+ ge=0.0,
+ le=1.0,
+ advanced=True,
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Video with narration (path or data URI)"
+ )
+ audio_file: MediaFileType = SchemaField(
+ description="Generated audio file (path or data URI)"
+ )
+
+ def __init__(self):
+ super().__init__(
+ id="3d036b53-859c-4b17-9826-ca340f736e0e",
+ description="Generate AI narration and add to video",
+ categories={BlockCategory.MULTIMEDIA, BlockCategory.AI},
+ input_schema=self.Input,
+ output_schema=self.Output,
+ test_input={
+ "video_in": "/tmp/test.mp4",
+ "script": "Hello world",
+ "credentials": TEST_CREDENTIALS_INPUT,
+ },
+ test_credentials=TEST_CREDENTIALS,
+ test_output=[("video_out", str), ("audio_file", str)],
+ test_mock={
+ "_generate_narration_audio": lambda *args: b"mock audio content",
+ "_add_narration_to_video": lambda *args: None,
+ "_store_input_video": lambda *args, **kwargs: "test.mp4",
+ "_store_output_video": lambda *args, **kwargs: "narrated_test.mp4",
+ },
+ )
+
+ async def _store_input_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store input video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+
+ async def _store_output_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store output video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ def _generate_narration_audio(
+ self, api_key: str, script: str, voice_id: str, model_id: str
+ ) -> bytes:
+ """Generate narration audio via ElevenLabs API."""
+ client = ElevenLabs(api_key=api_key)
+ audio_generator = client.text_to_speech.convert(
+ voice_id=voice_id,
+ text=script,
+ model_id=model_id,
+ )
+ # The SDK returns a generator, collect all chunks
+ return b"".join(audio_generator)
+
+ def _add_narration_to_video(
+ self,
+ video_abspath: str,
+ audio_abspath: str,
+ output_abspath: str,
+ mix_mode: str,
+ narration_volume: float,
+ original_volume: float,
+ ) -> None:
+ """Add narration audio to video. Extracted for testability."""
+ video = None
+ final = None
+ narration_original = None
+ narration_scaled = None
+ original = None
+
+ try:
+ strip_chapters_inplace(video_abspath)
+ video = VideoFileClip(video_abspath)
+ narration_original = AudioFileClip(audio_abspath)
+ narration_scaled = narration_original.with_volume_scaled(narration_volume)
+ narration = narration_scaled
+
+ if mix_mode == "replace":
+ final_audio = narration
+ elif mix_mode == "mix":
+ if video.audio:
+ original = video.audio.with_volume_scaled(original_volume)
+ final_audio = CompositeAudioClip([original, narration])
+ else:
+ final_audio = narration
+ else: # ducking - apply stronger attenuation
+ if video.audio:
+ # Ducking uses a much lower volume for original audio
+ ducking_volume = original_volume * 0.3
+ original = video.audio.with_volume_scaled(ducking_volume)
+ final_audio = CompositeAudioClip([original, narration])
+ else:
+ final_audio = narration
+
+ final = video.with_audio(final_audio)
+ video_codec, audio_codec = get_video_codecs(output_abspath)
+ final.write_videofile(
+ output_abspath, codec=video_codec, audio_codec=audio_codec
+ )
+
+ finally:
+ if original:
+ original.close()
+ if narration_scaled:
+ narration_scaled.close()
+ if narration_original:
+ narration_original.close()
+ if final:
+ final.close()
+ if video:
+ video.close()
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ credentials: ElevenLabsCredentials,
+ execution_context: ExecutionContext,
+ node_exec_id: str,
+ **kwargs,
+ ) -> BlockOutput:
+ try:
+ assert execution_context.graph_exec_id is not None
+
+ # Store the input video locally
+ local_video_path = await self._store_input_video(
+ execution_context, input_data.video_in
+ )
+ video_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, local_video_path
+ )
+
+ # Generate narration audio via ElevenLabs
+ audio_content = self._generate_narration_audio(
+ credentials.api_key.get_secret_value(),
+ input_data.script,
+ input_data.voice_id,
+ input_data.model_id,
+ )
+
+ # Save audio to exec file path
+ audio_filename = MediaFileType(f"{node_exec_id}_narration.mp3")
+ audio_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, audio_filename
+ )
+ os.makedirs(os.path.dirname(audio_abspath), exist_ok=True)
+ with open(audio_abspath, "wb") as f:
+ f.write(audio_content)
+
+ # Add narration to video
+ source = extract_source_name(local_video_path)
+ output_filename = MediaFileType(f"{node_exec_id}_narrated_{source}.mp4")
+ output_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, output_filename
+ )
+
+ self._add_narration_to_video(
+ video_abspath,
+ audio_abspath,
+ output_abspath,
+ input_data.mix_mode,
+ input_data.narration_volume,
+ input_data.original_volume,
+ )
+
+ # Return as workspace path or data URI based on context
+ video_out = await self._store_output_video(
+ execution_context, output_filename
+ )
+ audio_out = await self._store_output_video(
+ execution_context, audio_filename
+ )
+
+ yield "video_out", video_out
+ yield "audio_file", audio_out
+
+ except Exception as e:
+ raise BlockExecutionError(
+ message=f"Failed to add narration: {e}",
+ block_name=self.name,
+ block_id=str(self.id),
+ ) from e
diff --git a/autogpt_platform/backend/backend/blocks/video/text_overlay.py b/autogpt_platform/backend/backend/blocks/video/text_overlay.py
new file mode 100644
index 0000000000..cb7cfe0420
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/video/text_overlay.py
@@ -0,0 +1,231 @@
+"""VideoTextOverlayBlock - Add text overlay to video."""
+
+from typing import Literal
+
+from moviepy import CompositeVideoClip, TextClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import (
+ extract_source_name,
+ get_video_codecs,
+ strip_chapters_inplace,
+)
+from backend.data.block import (
+ Block,
+ BlockCategory,
+ BlockOutput,
+ BlockSchemaInput,
+ BlockSchemaOutput,
+)
+from backend.data.execution import ExecutionContext
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoTextOverlayBlock(Block):
+ """Add text overlay/caption to video."""
+
+ class Input(BlockSchemaInput):
+ video_in: MediaFileType = SchemaField(
+ description="Input video (URL, data URI, or local path)"
+ )
+ text: str = SchemaField(description="Text to overlay on video")
+ position: Literal[
+ "top",
+ "center",
+ "bottom",
+ "top-left",
+ "top-right",
+ "bottom-left",
+ "bottom-right",
+ ] = SchemaField(description="Position of text on screen", default="bottom")
+ start_time: float | None = SchemaField(
+ description="When to show text (seconds). None = entire video",
+ default=None,
+ advanced=True,
+ )
+ end_time: float | None = SchemaField(
+ description="When to hide text (seconds). None = until end",
+ default=None,
+ advanced=True,
+ )
+ font_size: int = SchemaField(
+ description="Font size", default=48, ge=12, le=200, advanced=True
+ )
+ font_color: str = SchemaField(
+ description="Font color (hex or name)", default="white", advanced=True
+ )
+ bg_color: str | None = SchemaField(
+ description="Background color behind text (None for transparent)",
+ default=None,
+ advanced=True,
+ )
+
+ class Output(BlockSchemaOutput):
+ video_out: MediaFileType = SchemaField(
+ description="Video with text overlay (path or data URI)"
+ )
+
+ def __init__(self):
+ super().__init__(
+ id="8ef14de6-cc90-430a-8cfa-3a003be92454",
+ description="Add text overlay/caption to video",
+ categories={BlockCategory.MULTIMEDIA},
+ input_schema=self.Input,
+ output_schema=self.Output,
+ disabled=True, # Disable until we can lockdown imagemagick security policy
+ test_input={"video_in": "/tmp/test.mp4", "text": "Hello World"},
+ test_output=[("video_out", str)],
+ test_mock={
+ "_add_text_overlay": lambda *args: None,
+ "_store_input_video": lambda *args, **kwargs: "test.mp4",
+ "_store_output_video": lambda *args, **kwargs: "overlay_test.mp4",
+ },
+ )
+
+ async def _store_input_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store input video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_local_processing",
+ )
+
+ async def _store_output_video(
+ self, execution_context: ExecutionContext, file: MediaFileType
+ ) -> MediaFileType:
+ """Store output video. Extracted for testability."""
+ return await store_media_file(
+ file=file,
+ execution_context=execution_context,
+ return_format="for_block_output",
+ )
+
+ def _add_text_overlay(
+ self,
+ video_abspath: str,
+ output_abspath: str,
+ text: str,
+ position: str,
+ start_time: float | None,
+ end_time: float | None,
+ font_size: int,
+ font_color: str,
+ bg_color: str | None,
+ ) -> None:
+ """Add text overlay to video. Extracted for testability."""
+ video = None
+ final = None
+ txt_clip = None
+ try:
+ strip_chapters_inplace(video_abspath)
+ video = VideoFileClip(video_abspath)
+
+ txt_clip = TextClip(
+ text=text,
+ font_size=font_size,
+ color=font_color,
+ bg_color=bg_color,
+ )
+
+ # Position mapping
+ pos_map = {
+ "top": ("center", "top"),
+ "center": ("center", "center"),
+ "bottom": ("center", "bottom"),
+ "top-left": ("left", "top"),
+ "top-right": ("right", "top"),
+ "bottom-left": ("left", "bottom"),
+ "bottom-right": ("right", "bottom"),
+ }
+
+ txt_clip = txt_clip.with_position(pos_map[position])
+
+ # Set timing
+ start = start_time or 0
+ end = end_time or video.duration
+ duration = max(0, end - start)
+ txt_clip = txt_clip.with_start(start).with_end(end).with_duration(duration)
+
+ final = CompositeVideoClip([video, txt_clip])
+ video_codec, audio_codec = get_video_codecs(output_abspath)
+ final.write_videofile(
+ output_abspath, codec=video_codec, audio_codec=audio_codec
+ )
+
+ finally:
+ if txt_clip:
+ txt_clip.close()
+ if final:
+ final.close()
+ if video:
+ video.close()
+
+ async def run(
+ self,
+ input_data: Input,
+ *,
+ execution_context: ExecutionContext,
+ node_exec_id: str,
+ **kwargs,
+ ) -> BlockOutput:
+ # Validate time range if both are provided
+ if (
+ input_data.start_time is not None
+ and input_data.end_time is not None
+ and input_data.end_time <= input_data.start_time
+ ):
+ raise BlockExecutionError(
+ message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})",
+ block_name=self.name,
+ block_id=str(self.id),
+ )
+
+ try:
+ assert execution_context.graph_exec_id is not None
+
+ # Store the input video locally
+ local_video_path = await self._store_input_video(
+ execution_context, input_data.video_in
+ )
+ video_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, local_video_path
+ )
+
+ # Build output path
+ source = extract_source_name(local_video_path)
+ output_filename = MediaFileType(f"{node_exec_id}_overlay_{source}.mp4")
+ output_abspath = get_exec_file_path(
+ execution_context.graph_exec_id, output_filename
+ )
+
+ self._add_text_overlay(
+ video_abspath,
+ output_abspath,
+ input_data.text,
+ input_data.position,
+ input_data.start_time,
+ input_data.end_time,
+ input_data.font_size,
+ input_data.font_color,
+ input_data.bg_color,
+ )
+
+ # Return as workspace path or data URI based on context
+ video_out = await self._store_output_video(
+ execution_context, output_filename
+ )
+
+ yield "video_out", video_out
+
+ except BlockExecutionError:
+ raise
+ except Exception as e:
+ raise BlockExecutionError(
+ message=f"Failed to add text overlay: {e}",
+ block_name=self.name,
+ block_id=str(self.id),
+ ) from e
diff --git a/autogpt_platform/backend/backend/data/block_cost_config.py b/autogpt_platform/backend/backend/data/block_cost_config.py
index 590f09cb41..ec35afa401 100644
--- a/autogpt_platform/backend/backend/data/block_cost_config.py
+++ b/autogpt_platform/backend/backend/data/block_cost_config.py
@@ -36,12 +36,14 @@ from backend.blocks.replicate.replicate_block import ReplicateModelBlock
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
from backend.blocks.talking_head import CreateTalkingAvatarVideoBlock
from backend.blocks.text_to_speech_block import UnrealTextToSpeechBlock
+from backend.blocks.video.narration import VideoNarrationBlock
from backend.data.block import Block, BlockCost, BlockCostType
from backend.integrations.credentials_store import (
aiml_api_credentials,
anthropic_credentials,
apollo_credentials,
did_credentials,
+ elevenlabs_credentials,
enrichlayer_credentials,
groq_credentials,
ideogram_credentials,
@@ -640,4 +642,16 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
},
),
],
+ VideoNarrationBlock: [
+ BlockCost(
+ cost_amount=5, # ElevenLabs TTS cost
+ cost_filter={
+ "credentials": {
+ "id": elevenlabs_credentials.id,
+ "provider": elevenlabs_credentials.provider,
+ "type": elevenlabs_credentials.type,
+ }
+ },
+ )
+ ],
}
diff --git a/autogpt_platform/backend/backend/integrations/credentials_store.py b/autogpt_platform/backend/backend/integrations/credentials_store.py
index 40a6f7269c..384405b0c7 100644
--- a/autogpt_platform/backend/backend/integrations/credentials_store.py
+++ b/autogpt_platform/backend/backend/integrations/credentials_store.py
@@ -224,6 +224,14 @@ openweathermap_credentials = APIKeyCredentials(
expires_at=None,
)
+elevenlabs_credentials = APIKeyCredentials(
+ id="f4a8b6c2-3d1e-4f5a-9b8c-7d6e5f4a3b2c",
+ provider="elevenlabs",
+ api_key=SecretStr(settings.secrets.elevenlabs_api_key),
+ title="Use Credits for ElevenLabs",
+ expires_at=None,
+)
+
DEFAULT_CREDENTIALS = [
ollama_credentials,
revid_credentials,
@@ -252,6 +260,7 @@ DEFAULT_CREDENTIALS = [
v0_credentials,
webshare_proxy_credentials,
openweathermap_credentials,
+ elevenlabs_credentials,
]
SYSTEM_CREDENTIAL_IDS = {cred.id for cred in DEFAULT_CREDENTIALS}
@@ -366,6 +375,8 @@ class IntegrationCredentialsStore:
all_credentials.append(webshare_proxy_credentials)
if settings.secrets.openweathermap_api_key:
all_credentials.append(openweathermap_credentials)
+ if settings.secrets.elevenlabs_api_key:
+ all_credentials.append(elevenlabs_credentials)
return all_credentials
async def get_creds_by_id(
diff --git a/autogpt_platform/backend/backend/integrations/providers.py b/autogpt_platform/backend/backend/integrations/providers.py
index 3af5006ca4..8a0d6fd183 100644
--- a/autogpt_platform/backend/backend/integrations/providers.py
+++ b/autogpt_platform/backend/backend/integrations/providers.py
@@ -18,6 +18,7 @@ class ProviderName(str, Enum):
DISCORD = "discord"
D_ID = "d_id"
E2B = "e2b"
+ ELEVENLABS = "elevenlabs"
FAL = "fal"
GITHUB = "github"
GOOGLE = "google"
diff --git a/autogpt_platform/backend/backend/util/file.py b/autogpt_platform/backend/backend/util/file.py
index baa9225629..1b8dbdea82 100644
--- a/autogpt_platform/backend/backend/util/file.py
+++ b/autogpt_platform/backend/backend/util/file.py
@@ -8,6 +8,8 @@ from pathlib import Path
from typing import TYPE_CHECKING, Literal
from urllib.parse import urlparse
+from pydantic import BaseModel
+
from backend.util.cloud_storage import get_cloud_storage_handler
from backend.util.request import Requests
from backend.util.settings import Config
@@ -17,6 +19,35 @@ from backend.util.virus_scanner import scan_content_safe
if TYPE_CHECKING:
from backend.data.execution import ExecutionContext
+
+class WorkspaceUri(BaseModel):
+ """Parsed workspace:// URI."""
+
+ file_ref: str # File ID or path (e.g. "abc123" or "/path/to/file.txt")
+ mime_type: str | None = None # MIME type from fragment (e.g. "video/mp4")
+ is_path: bool = False # True if file_ref is a path (starts with "/")
+
+
+def parse_workspace_uri(uri: str) -> WorkspaceUri:
+ """Parse a workspace:// URI into its components.
+
+ Examples:
+ "workspace://abc123" → WorkspaceUri(file_ref="abc123", mime_type=None, is_path=False)
+ "workspace://abc123#video/mp4" → WorkspaceUri(file_ref="abc123", mime_type="video/mp4", is_path=False)
+ "workspace:///path/to/file.txt" → WorkspaceUri(file_ref="/path/to/file.txt", mime_type=None, is_path=True)
+ """
+ raw = uri.removeprefix("workspace://")
+ mime_type: str | None = None
+ if "#" in raw:
+ raw, fragment = raw.split("#", 1)
+ mime_type = fragment or None
+ return WorkspaceUri(
+ file_ref=raw,
+ mime_type=mime_type,
+ is_path=raw.startswith("/"),
+ )
+
+
# Return format options for store_media_file
# - "for_local_processing": Returns local file path - use with ffmpeg, MoviePy, PIL, etc.
# - "for_external_api": Returns data URI (base64) - use when sending content to external APIs
@@ -183,22 +214,20 @@ async def store_media_file(
"This file type is only available in CoPilot sessions."
)
- # Parse workspace reference
- # workspace://abc123 - by file ID
- # workspace:///path/to/file.txt - by virtual path
- file_ref = file[12:] # Remove "workspace://"
+ # Parse workspace reference (strips #mimeType fragment from file ID)
+ ws = parse_workspace_uri(file)
- if file_ref.startswith("/"):
- # Path reference
- workspace_content = await workspace_manager.read_file(file_ref)
- file_info = await workspace_manager.get_file_info_by_path(file_ref)
+ if ws.is_path:
+ # Path reference: workspace:///path/to/file.txt
+ workspace_content = await workspace_manager.read_file(ws.file_ref)
+ file_info = await workspace_manager.get_file_info_by_path(ws.file_ref)
filename = sanitize_filename(
file_info.name if file_info else f"{uuid.uuid4()}.bin"
)
else:
- # ID reference
- workspace_content = await workspace_manager.read_file_by_id(file_ref)
- file_info = await workspace_manager.get_file_info(file_ref)
+ # ID reference: workspace://abc123 or workspace://abc123#video/mp4
+ workspace_content = await workspace_manager.read_file_by_id(ws.file_ref)
+ file_info = await workspace_manager.get_file_info(ws.file_ref)
filename = sanitize_filename(
file_info.name if file_info else f"{uuid.uuid4()}.bin"
)
@@ -334,7 +363,21 @@ async def store_media_file(
# Don't re-save if input was already from workspace
if is_from_workspace:
- # Return original workspace reference
+ # Return original workspace reference, ensuring MIME type fragment
+ ws = parse_workspace_uri(file)
+ if not ws.mime_type:
+ # Add MIME type fragment if missing (older refs without it)
+ try:
+ if ws.is_path:
+ info = await workspace_manager.get_file_info_by_path(
+ ws.file_ref
+ )
+ else:
+ info = await workspace_manager.get_file_info(ws.file_ref)
+ if info:
+ return MediaFileType(f"{file}#{info.mimeType}")
+ except Exception:
+ pass
return MediaFileType(file)
# Save new content to workspace
@@ -346,7 +389,7 @@ async def store_media_file(
filename=filename,
overwrite=True,
)
- return MediaFileType(f"workspace://{file_record.id}")
+ return MediaFileType(f"workspace://{file_record.id}#{file_record.mimeType}")
else:
raise ValueError(f"Invalid return_format: {return_format}")
diff --git a/autogpt_platform/backend/backend/util/settings.py b/autogpt_platform/backend/backend/util/settings.py
index aa28a4c9ac..50b7428160 100644
--- a/autogpt_platform/backend/backend/util/settings.py
+++ b/autogpt_platform/backend/backend/util/settings.py
@@ -656,6 +656,7 @@ class Secrets(UpdateTrackingModel["Secrets"], BaseSettings):
e2b_api_key: str = Field(default="", description="E2B API key")
nvidia_api_key: str = Field(default="", description="Nvidia API key")
mem0_api_key: str = Field(default="", description="Mem0 API key")
+ elevenlabs_api_key: str = Field(default="", description="ElevenLabs API key")
linear_client_id: str = Field(default="", description="Linear client ID")
linear_client_secret: str = Field(default="", description="Linear client secret")
diff --git a/autogpt_platform/backend/poetry.lock b/autogpt_platform/backend/poetry.lock
index 91ac358ade..61da8c974f 100644
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
@@ -1169,6 +1169,29 @@ attrs = ">=21.3.0"
e2b = ">=1.5.4,<2.0.0"
httpx = ">=0.20.0,<1.0.0"
+[[package]]
+name = "elevenlabs"
+version = "1.59.0"
+description = ""
+optional = false
+python-versions = "<4.0,>=3.8"
+groups = ["main"]
+files = [
+ {file = "elevenlabs-1.59.0-py3-none-any.whl", hash = "sha256:468145db81a0bc867708b4a8619699f75583e9481b395ec1339d0b443da771ed"},
+ {file = "elevenlabs-1.59.0.tar.gz", hash = "sha256:16e735bd594e86d415dd445d249c8cc28b09996cfd627fbc10102c0a84698859"},
+]
+
+[package.dependencies]
+httpx = ">=0.21.2"
+pydantic = ">=1.9.2"
+pydantic-core = ">=2.18.2,<3.0.0"
+requests = ">=2.20"
+typing_extensions = ">=4.0.0"
+websockets = ">=11.0"
+
+[package.extras]
+pyaudio = ["pyaudio (>=0.2.14)"]
+
[[package]]
name = "email-validator"
version = "2.2.0"
@@ -7361,6 +7384,28 @@ files = [
defusedxml = ">=0.7.1,<0.8.0"
requests = "*"
+[[package]]
+name = "yt-dlp"
+version = "2025.12.8"
+description = "A feature-rich command-line audio/video downloader"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+files = [
+ {file = "yt_dlp-2025.12.8-py3-none-any.whl", hash = "sha256:36e2584342e409cfbfa0b5e61448a1c5189e345cf4564294456ee509e7d3e065"},
+ {file = "yt_dlp-2025.12.8.tar.gz", hash = "sha256:b773c81bb6b71cb2c111cfb859f453c7a71cf2ef44eff234ff155877184c3e4f"},
+]
+
+[package.extras]
+build = ["build", "hatchling (>=1.27.0)", "pip", "setuptools (>=71.0.2)", "wheel"]
+curl-cffi = ["curl-cffi (>=0.5.10,<0.6.dev0 || >=0.10.dev0,<0.14) ; implementation_name == \"cpython\""]
+default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=2.0.2,<3)", "websockets (>=13.0)", "yt-dlp-ejs (==0.3.2)"]
+dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.14.0,<0.15.0)"]
+pyinstaller = ["pyinstaller (>=6.17.0)"]
+secretstorage = ["cffi", "secretstorage"]
+static-analysis = ["autopep8 (>=2.0,<3.0)", "ruff (>=0.14.0,<0.15.0)"]
+test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
+
[[package]]
name = "zerobouncesdk"
version = "1.1.2"
@@ -7512,4 +7557,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.14"
-content-hash = "ee5742dc1a9df50dfc06d4b26a1682cbb2b25cab6b79ce5625ec272f93e4f4bf"
+content-hash = "8239323f9ae6713224dffd1fe8ba8b449fe88b6c3c7a90940294a74f43a0387a"
diff --git a/autogpt_platform/backend/pyproject.toml b/autogpt_platform/backend/pyproject.toml
index fe263e47c0..24aea39f33 100644
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -20,6 +20,7 @@ click = "^8.2.0"
cryptography = "^45.0"
discord-py = "^2.5.2"
e2b-code-interpreter = "^1.5.2"
+elevenlabs = "^1.50.0"
fastapi = "^0.116.1"
feedparser = "^6.0.11"
flake8 = "^7.3.0"
@@ -71,6 +72,7 @@ tweepy = "^4.16.0"
uvicorn = { extras = ["standard"], version = "^0.35.0" }
websockets = "^15.0"
youtube-transcript-api = "^1.2.1"
+yt-dlp = "2025.12.08"
zerobouncesdk = "^1.1.2"
# NOTE: please insert new dependencies in their alphabetical location
pytest-snapshot = "^0.9.0"
diff --git a/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx b/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx
index 4213711447..c58bdac642 100644
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx
@@ -1,6 +1,6 @@
import { beautifyString } from "@/lib/utils";
import { Clipboard, Maximize2 } from "lucide-react";
-import React, { useState } from "react";
+import React, { useMemo, useState } from "react";
import { Button } from "../../../../../components/__legacy__/ui/button";
import { ContentRenderer } from "../../../../../components/__legacy__/ui/render";
import {
@@ -11,6 +11,12 @@ import {
TableHeader,
TableRow,
} from "../../../../../components/__legacy__/ui/table";
+import type { OutputMetadata } from "@/components/contextual/OutputRenderers";
+import {
+ globalRegistry,
+ OutputItem,
+} from "@/components/contextual/OutputRenderers";
+import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
import { useToast } from "../../../../../components/molecules/Toast/use-toast";
import ExpandableOutputDialog from "./ExpandableOutputDialog";
@@ -26,6 +32,9 @@ export default function DataTable({
data,
}: DataTableProps) {
const { toast } = useToast();
+ const enableEnhancedOutputHandling = useGetFlag(
+ Flag.ENABLE_ENHANCED_OUTPUT_HANDLING,
+ );
const [expandedDialog, setExpandedDialog] = useState<{
isOpen: boolean;
execId: string;
@@ -33,6 +42,15 @@ export default function DataTable({
data: any[];
} | null>(null);
+ // Prepare renderers for each item when enhanced mode is enabled
+ const getItemRenderer = useMemo(() => {
+ if (!enableEnhancedOutputHandling) return null;
+ return (item: unknown) => {
+ const metadata: OutputMetadata = {};
+ return globalRegistry.getRenderer(item, metadata);
+ };
+ }, [enableEnhancedOutputHandling]);
+
const copyData = (pin: string, data: string) => {
navigator.clipboard.writeText(data).then(() => {
toast({
@@ -102,15 +120,31 @@ export default function DataTable({