Merge branch 'dev' into feature/video-editing-blocks

feat(video): refactor video storage methods for improved testability across blocks
Merge branch 'feature/video-editing-blocks' of https://github.com/Significant-Gravitas/AutoGPT into feature/video-editing-blocks
2026-01-23 14:08:02 -05:00 · 2026-01-23 12:39:34 -06:00 · 2026-01-23 12:36:28 -06:00 · 2026-01-23 12:16:34 -06:00 · 2026-01-23 12:15:59 -06:00 · 2026-01-23 01:43:25 +00:00
21 changed files with 1484 additions and 254 deletions
--- a/autogpt_platform/backend/.env.default
+++ b/autogpt_platform/backend/.env.default
@@ -152,6 +152,7 @@ REPLICATE_API_KEY=
 REVID_API_KEY=
 SCREENSHOTONE_API_KEY=
 UNREAL_SPEECH_API_KEY=
+ELEVENLABS_API_KEY=

 # Data & Search Services
 E2B_API_KEY=
--- a/autogpt_platform/backend/backend/api/features/oauth_test.py
+++ b/autogpt_platform/backend/backend/api/features/oauth_test.py
@@ -165,7 +165,7 @@ async def client(server, test_user: str) -> AsyncGenerator[httpx.AsyncClient, No


@pytest.mark.asyncio(loop_scope="session")
-async def test_authorize_creates_code_in_database_test(
+async def test_authorize_creates_code_in_database(
    client: httpx.AsyncClient,
    test_user: str,
    test_oauth_app: dict,
--- a/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py
+++ b/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py
@@ -0,0 +1,28 @@
+"""ElevenLabs integration blocks - test credentials and shared utilities."""
+
+from typing import Literal
+
+from pydantic import SecretStr
+
+from backend.data.model import APIKeyCredentials, CredentialsMetaInput
+from backend.integrations.providers import ProviderName
+
+TEST_CREDENTIALS = APIKeyCredentials(
+    id="01234567-89ab-cdef-0123-456789abcdef",
+    provider="elevenlabs",
+    api_key=SecretStr("mock-elevenlabs-api-key"),
+    title="Mock ElevenLabs API key",
+    expires_at=None,
+)
+
+TEST_CREDENTIALS_INPUT = {
+    "provider": TEST_CREDENTIALS.provider,
+    "id": TEST_CREDENTIALS.id,
+    "type": TEST_CREDENTIALS.type,
+    "title": TEST_CREDENTIALS.title,
+}
+
+ElevenLabsCredentials = APIKeyCredentials
+ElevenLabsCredentialsInput = CredentialsMetaInput[
+    Literal[ProviderName.ELEVENLABS], Literal["api_key"]
+]
--- a/autogpt_platform/backend/backend/blocks/media.py
+++ b/autogpt_platform/backend/backend/blocks/media.py
@@ -1,251 +0,0 @@
-import os
-import tempfile
-from typing import Literal, Optional
-
-from moviepy.audio.io.AudioFileClip import AudioFileClip
-from moviepy.video.fx.Loop import Loop
-from moviepy.video.io.VideoFileClip import VideoFileClip
-
-from backend.data.block import (
-    Block,
-    BlockCategory,
-    BlockOutput,
-    BlockSchemaInput,
-    BlockSchemaOutput,
-)
-from backend.data.model import SchemaField
-from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
-
-
-class MediaDurationBlock(Block):
-
-    class Input(BlockSchemaInput):
-        media_in: MediaFileType = SchemaField(
-            description="Media input (URL, data URI, or local path)."
-        )
-        is_video: bool = SchemaField(
-            description="Whether the media is a video (True) or audio (False).",
-            default=True,
-        )
-
-    class Output(BlockSchemaOutput):
-        duration: float = SchemaField(
-            description="Duration of the media file (in seconds)."
-        )
-
-    def __init__(self):
-        super().__init__(
-            id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6",
-            description="Block to get the duration of a media file.",
-            categories={BlockCategory.MULTIMEDIA},
-            input_schema=MediaDurationBlock.Input,
-            output_schema=MediaDurationBlock.Output,
-        )
-
-    async def run(
-        self,
-        input_data: Input,
-        *,
-        graph_exec_id: str,
-        user_id: str,
-        **kwargs,
-    ) -> BlockOutput:
-        # 1) Store the input media locally
-        local_media_path = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=input_data.media_in,
-            user_id=user_id,
-            return_content=False,
-        )
-        media_abspath = get_exec_file_path(graph_exec_id, local_media_path)
-
-        # 2) Load the clip
-        if input_data.is_video:
-            clip = VideoFileClip(media_abspath)
-        else:
-            clip = AudioFileClip(media_abspath)
-
-        yield "duration", clip.duration
-
-
-class LoopVideoBlock(Block):
-    """
-    Block for looping (repeating) a video clip until a given duration or number of loops.
-    """
-
-    class Input(BlockSchemaInput):
-        video_in: MediaFileType = SchemaField(
-            description="The input video (can be a URL, data URI, or local path)."
-        )
-        # Provide EITHER a `duration` or `n_loops` or both. We'll demonstrate `duration`.
-        duration: Optional[float] = SchemaField(
-            description="Target duration (in seconds) to loop the video to. If omitted, defaults to no looping.",
-            default=None,
-            ge=0.0,
-        )
-        n_loops: Optional[int] = SchemaField(
-            description="Number of times to repeat the video. If omitted, defaults to 1 (no repeat).",
-            default=None,
-            ge=1,
-        )
-        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
-            description="How to return the output video. Either a relative path or base64 data URI.",
-            default="file_path",
-        )
-
-    class Output(BlockSchemaOutput):
-        video_out: str = SchemaField(
-            description="Looped video returned either as a relative path or a data URI."
-        )
-
-    def __init__(self):
-        super().__init__(
-            id="8bf9eef6-5451-4213-b265-25306446e94b",
-            description="Block to loop a video to a given duration or number of repeats.",
-            categories={BlockCategory.MULTIMEDIA},
-            input_schema=LoopVideoBlock.Input,
-            output_schema=LoopVideoBlock.Output,
-        )
-
-    async def run(
-        self,
-        input_data: Input,
-        *,
-        node_exec_id: str,
-        graph_exec_id: str,
-        user_id: str,
-        **kwargs,
-    ) -> BlockOutput:
-        # 1) Store the input video locally
-        local_video_path = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=input_data.video_in,
-            user_id=user_id,
-            return_content=False,
-        )
-        input_abspath = get_exec_file_path(graph_exec_id, local_video_path)
-
-        # 2) Load the clip
-        clip = VideoFileClip(input_abspath)
-
-        # 3) Apply the loop effect
-        looped_clip = clip
-        if input_data.duration:
-            # Loop until we reach the specified duration
-            looped_clip = looped_clip.with_effects([Loop(duration=input_data.duration)])
-        elif input_data.n_loops:
-            looped_clip = looped_clip.with_effects([Loop(n=input_data.n_loops)])
-        else:
-            raise ValueError("Either 'duration' or 'n_loops' must be provided.")
-
-        assert isinstance(looped_clip, VideoFileClip)
-
-        # 4) Save the looped output
-        output_filename = MediaFileType(
-            f"{node_exec_id}_looped_{os.path.basename(local_video_path)}"
-        )
-        output_abspath = get_exec_file_path(graph_exec_id, output_filename)
-
-        looped_clip = looped_clip.with_audio(clip.audio)
-        looped_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
-
-        # Return as data URI
-        video_out = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=output_filename,
-            user_id=user_id,
-            return_content=input_data.output_return_type == "data_uri",
-        )
-
-        yield "video_out", video_out
-
-
-class AddAudioToVideoBlock(Block):
-    """
-    Block that adds (attaches) an audio track to an existing video.
-    Optionally scale the volume of the new track.
-    """
-
-    class Input(BlockSchemaInput):
-        video_in: MediaFileType = SchemaField(
-            description="Video input (URL, data URI, or local path)."
-        )
-        audio_in: MediaFileType = SchemaField(
-            description="Audio input (URL, data URI, or local path)."
-        )
-        volume: float = SchemaField(
-            description="Volume scale for the newly attached audio track (1.0 = original).",
-            default=1.0,
-        )
-        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
-            description="Return the final output as a relative path or base64 data URI.",
-            default="file_path",
-        )
-
-    class Output(BlockSchemaOutput):
-        video_out: MediaFileType = SchemaField(
-            description="Final video (with attached audio), as a path or data URI."
-        )
-
-    def __init__(self):
-        super().__init__(
-            id="3503748d-62b6-4425-91d6-725b064af509",
-            description="Block to attach an audio file to a video file using moviepy.",
-            categories={BlockCategory.MULTIMEDIA},
-            input_schema=AddAudioToVideoBlock.Input,
-            output_schema=AddAudioToVideoBlock.Output,
-        )
-
-    async def run(
-        self,
-        input_data: Input,
-        *,
-        node_exec_id: str,
-        graph_exec_id: str,
-        user_id: str,
-        **kwargs,
-    ) -> BlockOutput:
-        # 1) Store the inputs locally
-        local_video_path = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=input_data.video_in,
-            user_id=user_id,
-            return_content=False,
-        )
-        local_audio_path = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=input_data.audio_in,
-            user_id=user_id,
-            return_content=False,
-        )
-
-        abs_temp_dir = os.path.join(tempfile.gettempdir(), "exec_file", graph_exec_id)
-        video_abspath = os.path.join(abs_temp_dir, local_video_path)
-        audio_abspath = os.path.join(abs_temp_dir, local_audio_path)
-
-        # 2) Load video + audio with moviepy
-        video_clip = VideoFileClip(video_abspath)
-        audio_clip = AudioFileClip(audio_abspath)
-        # Optionally scale volume
-        if input_data.volume != 1.0:
-            audio_clip = audio_clip.with_volume_scaled(input_data.volume)
-
-        # 3) Attach the new audio track
-        final_clip = video_clip.with_audio(audio_clip)
-
-        # 4) Write to output file
-        output_filename = MediaFileType(
-            f"{node_exec_id}_audio_attached_{os.path.basename(local_video_path)}"
-        )
-        output_abspath = os.path.join(abs_temp_dir, output_filename)
-        final_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
-
-        # 5) Return either path or data URI
-        video_out = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=output_filename,
-            user_id=user_id,
-            return_content=input_data.output_return_type == "data_uri",
-        )
-
-        yield "video_out", video_out
--- a/autogpt_platform/backend/backend/blocks/video/init.py
+++ b/autogpt_platform/backend/backend/blocks/video/init.py
@@ -0,0 +1,37 @@
+"""Video editing blocks for AutoGPT Platform.
+
+This module provides blocks for:
+- Downloading videos from URLs (YouTube, Vimeo, news sites, direct links)
+- Clipping/trimming video segments
+- Concatenating multiple videos
+- Adding text overlays
+- Adding AI-generated narration
+- Getting media duration
+- Looping videos
+- Adding audio to videos
+
+Dependencies:
+- yt-dlp: For video downloading
+- moviepy: For video editing operations
+- requests: For API calls (narration block)
+"""
+
+from backend.blocks.video.add_audio import AddAudioToVideoBlock
+from backend.blocks.video.clip import VideoClipBlock
+from backend.blocks.video.concat import VideoConcatBlock
+from backend.blocks.video.download import VideoDownloadBlock
+from backend.blocks.video.duration import MediaDurationBlock
+from backend.blocks.video.loop import LoopVideoBlock
+from backend.blocks.video.narration import VideoNarrationBlock
+from backend.blocks.video.text_overlay import VideoTextOverlayBlock
+
+__all__ = [
+    "AddAudioToVideoBlock",
+    "LoopVideoBlock",
+    "MediaDurationBlock",
+    "VideoClipBlock",
+    "VideoConcatBlock",
+    "VideoDownloadBlock",
+    "VideoNarrationBlock",
+    "VideoTextOverlayBlock",
+]
--- a/autogpt_platform/backend/backend/blocks/video/add_audio.py
+++ b/autogpt_platform/backend/backend/blocks/video/add_audio.py
@@ -0,0 +1,125 @@
+"""AddAudioToVideoBlock - Attach an audio track to a video."""
+
+import os
+from typing import Literal
+
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class AddAudioToVideoBlock(Block):
+    """Attach an audio track to an existing video."""
+
+    class Input(BlockSchemaInput):
+        video_in: MediaFileType = SchemaField(
+            description="Video input (URL, data URI, or local path)."
+        )
+        audio_in: MediaFileType = SchemaField(
+            description="Audio input (URL, data URI, or local path)."
+        )
+        volume: float = SchemaField(
+            description="Volume scale for the newly attached audio track (1.0 = original).",
+            default=1.0,
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the final output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: MediaFileType = SchemaField(
+            description="Final video (with attached audio), as a path or data URI."
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="3503748d-62b6-4425-91d6-725b064af509",
+            description="Block to attach an audio file to a video file using moviepy.",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=AddAudioToVideoBlock.Input,
+            output_schema=AddAudioToVideoBlock.Output,
+        )
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # 1) Store the inputs locally
+        local_video_path = await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=input_data.video_in,
+            user_id=user_id,
+            return_content=False,
+        )
+        local_audio_path = await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=input_data.audio_in,
+            user_id=user_id,
+            return_content=False,
+        )
+
+        video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+        audio_abspath = get_exec_file_path(graph_exec_id, local_audio_path)
+
+        video_clip = None
+        audio_clip_original = None
+        audio_clip_scaled = None
+        final_clip = None
+        try:
+            # 2) Load video + audio with moviepy
+            video_clip = VideoFileClip(video_abspath)
+            audio_clip_original = AudioFileClip(audio_abspath)
+
+            # Optionally scale volume
+            audio_to_use = audio_clip_original
+            if input_data.volume != 1.0:
+                audio_clip_scaled = audio_clip_original.with_volume_scaled(
+                    input_data.volume
+                )
+                audio_to_use = audio_clip_scaled
+
+            # 3) Attach the new audio track
+            final_clip = video_clip.with_audio(audio_to_use)
+
+            # 4) Write to output file
+            output_filename = MediaFileType(
+                f"{node_exec_id}_audio_attached_{os.path.basename(local_video_path)}"
+            )
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+            final_clip.write_videofile(
+                output_abspath, codec="libx264", audio_codec="aac"
+            )
+
+            # 5) Return either path or data URI
+            video_out = await store_media_file(
+                graph_exec_id=graph_exec_id,
+                file=output_filename,
+                user_id=user_id,
+                return_content=input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_out", video_out
+        finally:
+            if final_clip:
+                final_clip.close()
+            if audio_clip_scaled:
+                audio_clip_scaled.close()
+            if audio_clip_original:
+                audio_clip_original.close()
+            if video_clip:
+                video_clip.close()
--- a/autogpt_platform/backend/backend/blocks/video/clip.py
+++ b/autogpt_platform/backend/backend/blocks/video/clip.py
@@ -0,0 +1,168 @@
+"""VideoClipBlock - Extract a segment from a video file."""
+
+import os
+from typing import Literal
+
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoClipBlock(Block):
+    """Extract a time segment from a video."""
+
+    class Input(BlockSchemaInput):
+        video_in: MediaFileType = SchemaField(
+            description="Input video (URL, data URI, or local path)"
+        )
+        start_time: float = SchemaField(description="Start time in seconds", ge=0.0)
+        end_time: float = SchemaField(description="End time in seconds", ge=0.0)
+        output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField(
+            description="Output format", default="mp4", advanced=True
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: MediaFileType = SchemaField(
+            description="Clipped video file (path or data URI)"
+        )
+        duration: float = SchemaField(description="Clip duration in seconds")
+
+    def __init__(self):
+        super().__init__(
+            id="8f539119-e580-4d86-ad41-86fbcb22abb1",
+            description="Extract a time segment from a video",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=self.Input,
+            output_schema=self.Output,
+            test_input={
+                "video_in": "/tmp/test.mp4",
+                "start_time": 0.0,
+                "end_time": 10.0,
+            },
+            test_output=[("video_out", str), ("duration", float)],
+            test_mock={
+                "_clip_video": lambda *args: 10.0,
+                "_store_input_video": lambda *args, **kwargs: "test.mp4",
+                "_store_output_video": lambda *args, **kwargs: "clip_test.mp4",
+            },
+        )
+
+    async def _store_input_video(
+        self, graph_exec_id: str, file: MediaFileType, user_id: str
+    ) -> MediaFileType:
+        """Store input video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=False,
+        )
+
+    async def _store_output_video(
+        self,
+        graph_exec_id: str,
+        file: MediaFileType,
+        user_id: str,
+        return_content: bool,
+    ) -> MediaFileType:
+        """Store output video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=return_content,
+        )
+
+    def _clip_video(
+        self,
+        video_abspath: str,
+        output_abspath: str,
+        start_time: float,
+        end_time: float,
+    ) -> float:
+        """Extract a clip from a video. Extracted for testability."""
+        clip = None
+        subclip = None
+        try:
+            clip = VideoFileClip(video_abspath)
+            subclip = clip.subclipped(start_time, end_time)
+            subclip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
+            return subclip.duration
+        finally:
+            if subclip:
+                subclip.close()
+            if clip:
+                clip.close()
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # Validate time range
+        if input_data.end_time <= input_data.start_time:
+            raise BlockExecutionError(
+                message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})",
+                block_name=self.name,
+                block_id=str(self.id),
+            )
+
+        try:
+            # Store the input video locally
+            local_video_path = await self._store_input_video(
+                graph_exec_id, input_data.video_in, user_id
+            )
+            video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+
+            # Build output path
+            output_filename = MediaFileType(
+                f"{node_exec_id}_clip_{os.path.basename(local_video_path)}"
+            )
+            # Ensure correct extension
+            base, _ = os.path.splitext(output_filename)
+            output_filename = MediaFileType(f"{base}.{input_data.output_format}")
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+            duration = self._clip_video(
+                video_abspath,
+                output_abspath,
+                input_data.start_time,
+                input_data.end_time,
+            )
+
+            # Return as data URI or path
+            video_out = await self._store_output_video(
+                graph_exec_id,
+                output_filename,
+                user_id,
+                input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_out", video_out
+            yield "duration", duration
+
+        except BlockExecutionError:
+            raise
+        except Exception as e:
+            raise BlockExecutionError(
+                message=f"Failed to clip video: {e}",
+                block_name=self.name,
+                block_id=str(self.id),
+            ) from e
--- a/autogpt_platform/backend/backend/blocks/video/concat.py
+++ b/autogpt_platform/backend/backend/blocks/video/concat.py
@@ -0,0 +1,202 @@
+"""VideoConcatBlock - Concatenate multiple video clips into one."""
+
+from typing import Literal
+
+from moviepy import concatenate_videoclips
+from moviepy.video.fx import CrossFadeIn, CrossFadeOut, FadeIn, FadeOut
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoConcatBlock(Block):
+    """Merge multiple video clips into one continuous video."""
+
+    class Input(BlockSchemaInput):
+        videos: list[MediaFileType] = SchemaField(
+            description="List of video files to concatenate (in order)"
+        )
+        transition: Literal["none", "crossfade", "fade_black"] = SchemaField(
+            description="Transition between clips", default="none"
+        )
+        transition_duration: int = SchemaField(
+            description="Transition duration in seconds",
+            default=1,
+            ge=0,
+            advanced=True,
+        )
+        output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField(
+            description="Output format", default="mp4", advanced=True
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: MediaFileType = SchemaField(
+            description="Concatenated video file (path or data URI)"
+        )
+        total_duration: float = SchemaField(description="Total duration in seconds")
+
+    def __init__(self):
+        super().__init__(
+            id="9b0f531a-1118-487f-aeec-3fa63ea8900a",
+            description="Merge multiple video clips into one continuous video",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=self.Input,
+            output_schema=self.Output,
+            test_input={"videos": ["/tmp/a.mp4", "/tmp/b.mp4"]},
+            test_output=[("video_out", str), ("total_duration", float)],
+            test_mock={
+                "_concat_videos": lambda *args: 20.0,
+                "_store_input_video": lambda *args, **kwargs: "test.mp4",
+                "_store_output_video": lambda *args, **kwargs: "concat_test.mp4",
+            },
+        )
+
+    async def _store_input_video(
+        self, graph_exec_id: str, file: MediaFileType, user_id: str
+    ) -> MediaFileType:
+        """Store input video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=False,
+        )
+
+    async def _store_output_video(
+        self,
+        graph_exec_id: str,
+        file: MediaFileType,
+        user_id: str,
+        return_content: bool,
+    ) -> MediaFileType:
+        """Store output video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=return_content,
+        )
+
+    def _concat_videos(
+        self,
+        video_abspaths: list[str],
+        output_abspath: str,
+        transition: str,
+        transition_duration: int,
+    ) -> float:
+        """Concatenate videos. Extracted for testability."""
+        clips = []
+        faded_clips = []
+        final = None
+        try:
+            # Load clips
+            for v in video_abspaths:
+                clips.append(VideoFileClip(v))
+
+            if transition == "crossfade":
+                for i, clip in enumerate(clips):
+                    effects = []
+                    if i > 0:
+                        effects.append(CrossFadeIn(transition_duration))
+                    if i < len(clips) - 1:
+                        effects.append(CrossFadeOut(transition_duration))
+                    if effects:
+                        clip = clip.with_effects(effects)
+                    faded_clips.append(clip)
+                final = concatenate_videoclips(
+                    faded_clips,
+                    method="compose",
+                    padding=-transition_duration,
+                )
+            elif transition == "fade_black":
+                for clip in clips:
+                    faded = clip.with_effects(
+                        [FadeIn(transition_duration), FadeOut(transition_duration)]
+                    )
+                    faded_clips.append(faded)
+                final = concatenate_videoclips(faded_clips)
+            else:
+                final = concatenate_videoclips(clips)
+
+            final.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
+
+            return final.duration
+        finally:
+            if final:
+                final.close()
+            for clip in faded_clips:
+                clip.close()
+            for clip in clips:
+                clip.close()
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # Validate minimum clips
+        if len(input_data.videos) < 2:
+            raise BlockExecutionError(
+                message="At least 2 videos are required for concatenation",
+                block_name=self.name,
+                block_id=str(self.id),
+            )
+
+        try:
+            # Store all input videos locally
+            video_abspaths = []
+            for video in input_data.videos:
+                local_path = await self._store_input_video(
+                    graph_exec_id, video, user_id
+                )
+                video_abspaths.append(get_exec_file_path(graph_exec_id, local_path))
+
+            # Build output path
+            output_filename = MediaFileType(
+                f"{node_exec_id}_concat.{input_data.output_format}"
+            )
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+            total_duration = self._concat_videos(
+                video_abspaths,
+                output_abspath,
+                input_data.transition,
+                input_data.transition_duration,
+            )
+
+            # Return as data URI or path
+            video_out = await self._store_output_video(
+                graph_exec_id,
+                output_filename,
+                user_id,
+                input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_out", video_out
+            yield "total_duration", total_duration
+
+        except BlockExecutionError:
+            raise
+        except Exception as e:
+            raise BlockExecutionError(
+                message=f"Failed to concatenate videos: {e}",
+                block_name=self.name,
+                block_id=str(self.id),
+            ) from e
--- a/autogpt_platform/backend/backend/blocks/video/download.py
+++ b/autogpt_platform/backend/backend/blocks/video/download.py
@@ -0,0 +1,177 @@
+"""VideoDownloadBlock - Download video from URL (YouTube, Vimeo, news sites, direct links)."""
+
+import os
+import typing
+from typing import Literal
+
+import yt_dlp
+
+if typing.TYPE_CHECKING:
+    from yt_dlp import _Params
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoDownloadBlock(Block):
+    """Download video from URL using yt-dlp."""
+
+    class Input(BlockSchemaInput):
+        url: str = SchemaField(
+            description="URL of the video to download (YouTube, Vimeo, direct link, etc.)",
+            placeholder="https://www.youtube.com/watch?v=...",
+        )
+        quality: Literal["best", "1080p", "720p", "480p", "audio_only"] = SchemaField(
+            description="Video quality preference", default="720p"
+        )
+        output_format: Literal["mp4", "webm", "mkv"] = SchemaField(
+            description="Output video format", default="mp4", advanced=True
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_file: MediaFileType = SchemaField(
+            description="Downloaded video (path or data URI)"
+        )
+        duration: float = SchemaField(description="Video duration in seconds")
+        title: str = SchemaField(description="Video title from source")
+        source_url: str = SchemaField(description="Original source URL")
+
+    def __init__(self):
+        super().__init__(
+            id="c35daabb-cd60-493b-b9ad-51f1fe4b50c4",
+            description="Download video from URL (YouTube, Vimeo, news sites, direct links)",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=self.Input,
+            output_schema=self.Output,
+            test_input={
+                "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+                "quality": "480p",
+            },
+            test_output=[
+                ("video_file", str),
+                ("duration", float),
+                ("title", str),
+                ("source_url", str),
+            ],
+            test_mock={
+                "_download_video": lambda *args: ("video.mp4", 212.0, "Test Video"),
+                "_store_output_video": lambda *args, **kwargs: "video.mp4",
+            },
+        )
+
+    async def _store_output_video(
+        self,
+        graph_exec_id: str,
+        file: MediaFileType,
+        user_id: str,
+        return_content: bool,
+    ) -> MediaFileType:
+        """Store output video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=return_content,
+        )
+
+    def _get_format_string(self, quality: str) -> str:
+        formats = {
+            "best": "bestvideo+bestaudio/best",
+            "1080p": "bestvideo[height<=1080]+bestaudio/best[height<=1080]",
+            "720p": "bestvideo[height<=720]+bestaudio/best[height<=720]",
+            "480p": "bestvideo[height<=480]+bestaudio/best[height<=480]",
+            "audio_only": "bestaudio/best",
+        }
+        return formats.get(quality, formats["720p"])
+
+    def _download_video(
+        self,
+        url: str,
+        quality: str,
+        output_format: str,
+        output_dir: str,
+        node_exec_id: str,
+    ) -> tuple[str, float, str]:
+        """Download video. Extracted for testability."""
+        output_template = os.path.join(
+            output_dir, f"{node_exec_id}_%(title).50s.%(ext)s"
+        )
+
+        ydl_opts: "_Params" = {
+            "format": self._get_format_string(quality),
+            "outtmpl": output_template,
+            "merge_output_format": output_format,
+            "quiet": True,
+            "no_warnings": True,
+        }
+
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=True)
+            video_path = ydl.prepare_filename(info)
+
+            # Handle format conversion in filename
+            if not video_path.endswith(f".{output_format}"):
+                video_path = video_path.rsplit(".", 1)[0] + f".{output_format}"
+
+            # Return just the filename, not the full path
+            filename = os.path.basename(video_path)
+
+            return (
+                filename,
+                info.get("duration") or 0.0,
+                info.get("title") or "Unknown",
+            )
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        try:
+            # Get the exec file directory
+            output_dir = get_exec_file_path(graph_exec_id, "")
+            os.makedirs(output_dir, exist_ok=True)
+
+            filename, duration, title = self._download_video(
+                input_data.url,
+                input_data.quality,
+                input_data.output_format,
+                output_dir,
+                node_exec_id,
+            )
+
+            # Return as data URI or path
+            video_out = await self._store_output_video(
+                graph_exec_id,
+                MediaFileType(filename),
+                user_id,
+                input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_file", video_out
+            yield "duration", duration
+            yield "title", title
+            yield "source_url", input_data.url
+
+        except Exception as e:
+            raise BlockExecutionError(
+                message=f"Failed to download video: {e}",
+                block_name=self.name,
+                block_id=str(self.id),
+            ) from e
--- a/autogpt_platform/backend/backend/blocks/video/duration.py
+++ b/autogpt_platform/backend/backend/blocks/video/duration.py
@@ -0,0 +1,71 @@
+"""MediaDurationBlock - Get the duration of a media file."""
+
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class MediaDurationBlock(Block):
+    """Get the duration of a media file."""
+
+    class Input(BlockSchemaInput):
+        media_in: MediaFileType = SchemaField(
+            description="Media input (URL, data URI, or local path)."
+        )
+        is_video: bool = SchemaField(
+            description="Whether the media is a video (True) or audio (False).",
+            default=True,
+        )
+
+    class Output(BlockSchemaOutput):
+        duration: float = SchemaField(
+            description="Duration of the media file (in seconds)."
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6",
+            description="Block to get the duration of a media file.",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=MediaDurationBlock.Input,
+            output_schema=MediaDurationBlock.Output,
+        )
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # 1) Store the input media locally
+        local_media_path = await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=input_data.media_in,
+            user_id=user_id,
+            return_content=False,
+        )
+        media_abspath = get_exec_file_path(graph_exec_id, local_media_path)
+
+        # 2) Load the clip
+        clip = None
+        try:
+            if input_data.is_video:
+                clip = VideoFileClip(media_abspath)
+            else:
+                clip = AudioFileClip(media_abspath)
+
+            yield "duration", clip.duration
+        finally:
+            if clip:
+                clip.close()
--- a/autogpt_platform/backend/backend/blocks/video/loop.py
+++ b/autogpt_platform/backend/backend/blocks/video/loop.py
@@ -0,0 +1,114 @@
+"""LoopVideoBlock - Loop a video to a given duration or number of repeats."""
+
+import os
+from typing import Literal, Optional
+
+from moviepy.video.fx.Loop import Loop
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class LoopVideoBlock(Block):
+    """Loop (repeat) a video clip until a given duration or number of loops."""
+
+    class Input(BlockSchemaInput):
+        video_in: MediaFileType = SchemaField(
+            description="The input video (can be a URL, data URI, or local path)."
+        )
+        duration: Optional[float] = SchemaField(
+            description="Target duration (in seconds) to loop the video to. If omitted, defaults to no looping.",
+            default=None,
+            ge=0.0,
+        )
+        n_loops: Optional[int] = SchemaField(
+            description="Number of times to repeat the video. If omitted, defaults to 1 (no repeat).",
+            default=None,
+            ge=1,
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="How to return the output video. Either a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: str = SchemaField(
+            description="Looped video returned either as a relative path or a data URI."
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="8bf9eef6-5451-4213-b265-25306446e94b",
+            description="Block to loop a video to a given duration or number of repeats.",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=LoopVideoBlock.Input,
+            output_schema=LoopVideoBlock.Output,
+        )
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # 1) Store the input video locally
+        local_video_path = await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=input_data.video_in,
+            user_id=user_id,
+            return_content=False,
+        )
+        input_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+
+        clip: VideoFileClip | None = None
+        looped_clip: VideoFileClip | None = None
+        try:
+            # 2) Load the clip
+            clip = VideoFileClip(input_abspath)
+
+            # 3) Apply the loop effect
+            # Note: Loop effect handles both video and audio looping automatically
+            if input_data.duration:
+                looped_clip = clip.with_effects([Loop(duration=input_data.duration)])  # type: ignore[arg-type] Clip implements shallow copy that loses type info
+            elif input_data.n_loops:
+                looped_clip = clip.with_effects([Loop(n=input_data.n_loops)])  # type: ignore[arg-type] Clip implements shallow copy that loses type info
+            else:
+                raise ValueError("Either 'duration' or 'n_loops' must be provided.")
+
+            # 4) Save the looped output
+            output_filename = MediaFileType(
+                f"{node_exec_id}_looped_{os.path.basename(local_video_path)}"
+            )
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+            assert looped_clip is not None
+
+            looped_clip.write_videofile(
+                output_abspath, codec="libx264", audio_codec="aac"
+            )
+
+            # Return as data URI or path
+            video_out = await store_media_file(
+                graph_exec_id=graph_exec_id,
+                file=output_filename,
+                user_id=user_id,
+                return_content=input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_out", video_out
+        finally:
+            if looped_clip is not None:
+                looped_clip.close()
+            if clip is not None:
+                clip.close()
--- a/autogpt_platform/backend/backend/blocks/video/narration.py
+++ b/autogpt_platform/backend/backend/blocks/video/narration.py
@@ -0,0 +1,254 @@
+"""VideoNarrationBlock - Generate AI voice narration and add to video."""
+
+import os
+from typing import Literal
+
+from elevenlabs import ElevenLabs
+from moviepy import CompositeAudioClip
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.elevenlabs._auth import (
+    TEST_CREDENTIALS,
+    TEST_CREDENTIALS_INPUT,
+    ElevenLabsCredentials,
+    ElevenLabsCredentialsInput,
+)
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import CredentialsField, SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoNarrationBlock(Block):
+    """Generate AI narration and add to video."""
+
+    class Input(BlockSchemaInput):
+        credentials: ElevenLabsCredentialsInput = CredentialsField(
+            description="ElevenLabs API key for voice synthesis"
+        )
+        video_in: MediaFileType = SchemaField(
+            description="Input video (URL, data URI, or local path)"
+        )
+        script: str = SchemaField(description="Narration script text")
+        voice_id: str = SchemaField(
+            description="ElevenLabs voice ID", default="21m00Tcm4TlvDq8ikWAM"  # Rachel
+        )
+        mix_mode: Literal["replace", "mix", "ducking"] = SchemaField(
+            description="How to combine with original audio. 'ducking' applies stronger attenuation than 'mix'.",
+            default="ducking",
+        )
+        narration_volume: float = SchemaField(
+            description="Narration volume (0.0 to 2.0)",
+            default=1.0,
+            ge=0.0,
+            le=2.0,
+            advanced=True,
+        )
+        original_volume: float = SchemaField(
+            description="Original audio volume when mixing (0.0 to 1.0)",
+            default=0.3,
+            ge=0.0,
+            le=1.0,
+            advanced=True,
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: MediaFileType = SchemaField(
+            description="Video with narration (path or data URI)"
+        )
+        audio_file: MediaFileType = SchemaField(
+            description="Generated audio file (path or data URI)"
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="3d036b53-859c-4b17-9826-ca340f736e0e",
+            description="Generate AI narration and add to video",
+            categories={BlockCategory.MULTIMEDIA, BlockCategory.AI},
+            input_schema=self.Input,
+            output_schema=self.Output,
+            test_input={
+                "video_in": "/tmp/test.mp4",
+                "script": "Hello world",
+                "credentials": TEST_CREDENTIALS_INPUT,
+            },
+            test_credentials=TEST_CREDENTIALS,
+            test_output=[("video_out", str), ("audio_file", str)],
+            test_mock={
+                "_generate_narration_audio": lambda *args: b"mock audio content",
+                "_add_narration_to_video": lambda *args: None,
+                "_store_input_video": lambda *args, **kwargs: "test.mp4",
+                "_store_output_video": lambda *args, **kwargs: "narrated_test.mp4",
+            },
+        )
+
+    async def _store_input_video(
+        self, graph_exec_id: str, file: MediaFileType, user_id: str
+    ) -> MediaFileType:
+        """Store input video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=False,
+        )
+
+    async def _store_output_video(
+        self,
+        graph_exec_id: str,
+        file: MediaFileType,
+        user_id: str,
+        return_content: bool,
+    ) -> MediaFileType:
+        """Store output video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=return_content,
+        )
+
+    def _generate_narration_audio(
+        self, api_key: str, script: str, voice_id: str
+    ) -> bytes:
+        """Generate narration audio via ElevenLabs API."""
+        client = ElevenLabs(api_key=api_key)
+        audio_generator = client.text_to_speech.convert(
+            voice_id=voice_id,
+            text=script,
+            model_id="eleven_monolingual_v1",
+        )
+        # The SDK returns a generator, collect all chunks
+        return b"".join(audio_generator)
+
+    def _add_narration_to_video(
+        self,
+        video_abspath: str,
+        audio_abspath: str,
+        output_abspath: str,
+        mix_mode: str,
+        narration_volume: float,
+        original_volume: float,
+    ) -> None:
+        """Add narration audio to video. Extracted for testability."""
+        video = None
+        final = None
+        narration_original = None
+        narration_scaled = None
+        original = None
+
+        try:
+            video = VideoFileClip(video_abspath)
+            narration_original = AudioFileClip(audio_abspath)
+            narration_scaled = narration_original.with_volume_scaled(narration_volume)
+            narration = narration_scaled
+
+            if mix_mode == "replace":
+                final_audio = narration
+            elif mix_mode == "mix":
+                if video.audio:
+                    original = video.audio.with_volume_scaled(original_volume)
+                    final_audio = CompositeAudioClip([original, narration])
+                else:
+                    final_audio = narration
+            else:  # ducking - apply stronger attenuation
+                if video.audio:
+                    # Ducking uses a much lower volume for original audio
+                    ducking_volume = original_volume * 0.3
+                    original = video.audio.with_volume_scaled(ducking_volume)
+                    final_audio = CompositeAudioClip([original, narration])
+                else:
+                    final_audio = narration
+
+            final = video.with_audio(final_audio)
+            final.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
+
+        finally:
+            if original:
+                original.close()
+            if narration_scaled:
+                narration_scaled.close()
+            if narration_original:
+                narration_original.close()
+            if final:
+                final.close()
+            if video:
+                video.close()
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        credentials: ElevenLabsCredentials,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        try:
+            # Store the input video locally
+            local_video_path = await self._store_input_video(
+                graph_exec_id, input_data.video_in, user_id
+            )
+            video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+
+            # Generate narration audio via ElevenLabs
+            audio_content = self._generate_narration_audio(
+                credentials.api_key.get_secret_value(),
+                input_data.script,
+                input_data.voice_id,
+            )
+
+            # Save audio to exec file path
+            audio_filename = MediaFileType(f"{node_exec_id}_narration.mp3")
+            audio_abspath = get_exec_file_path(graph_exec_id, audio_filename)
+            os.makedirs(os.path.dirname(audio_abspath), exist_ok=True)
+            with open(audio_abspath, "wb") as f:
+                f.write(audio_content)
+
+            # Add narration to video
+            output_filename = MediaFileType(
+                f"{node_exec_id}_narrated_{os.path.basename(local_video_path)}"
+            )
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+            self._add_narration_to_video(
+                video_abspath,
+                audio_abspath,
+                output_abspath,
+                input_data.mix_mode,
+                input_data.narration_volume,
+                input_data.original_volume,
+            )
+
+            # Return as data URI or path
+            return_as_data_uri = input_data.output_return_type == "data_uri"
+
+            video_out = await self._store_output_video(
+                graph_exec_id, output_filename, user_id, return_as_data_uri
+            )
+
+            audio_out = await self._store_output_video(
+                graph_exec_id, audio_filename, user_id, return_as_data_uri
+            )
+
+            yield "video_out", video_out
+            yield "audio_file", audio_out
+
+        except Exception as e:
+            raise BlockExecutionError(
+                message=f"Failed to add narration: {e}",
+                block_name=self.name,
+                block_id=str(self.id),
+            ) from e
--- a/autogpt_platform/backend/backend/blocks/video/text_overlay.py
+++ b/autogpt_platform/backend/backend/blocks/video/text_overlay.py
@@ -0,0 +1,230 @@
+"""VideoTextOverlayBlock - Add text overlay to video."""
+
+import os
+from typing import Literal
+
+from moviepy import CompositeVideoClip, TextClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoTextOverlayBlock(Block):
+    """Add text overlay/caption to video."""
+
+    class Input(BlockSchemaInput):
+        video_in: MediaFileType = SchemaField(
+            description="Input video (URL, data URI, or local path)"
+        )
+        text: str = SchemaField(description="Text to overlay on video")
+        position: Literal[
+            "top",
+            "center",
+            "bottom",
+            "top-left",
+            "top-right",
+            "bottom-left",
+            "bottom-right",
+        ] = SchemaField(description="Position of text on screen", default="bottom")
+        start_time: float | None = SchemaField(
+            description="When to show text (seconds). None = entire video",
+            default=None,
+            advanced=True,
+        )
+        end_time: float | None = SchemaField(
+            description="When to hide text (seconds). None = until end",
+            default=None,
+            advanced=True,
+        )
+        font_size: int = SchemaField(
+            description="Font size", default=48, ge=12, le=200, advanced=True
+        )
+        font_color: str = SchemaField(
+            description="Font color (hex or name)", default="white", advanced=True
+        )
+        bg_color: str | None = SchemaField(
+            description="Background color behind text (None for transparent)",
+            default=None,
+            advanced=True,
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: MediaFileType = SchemaField(
+            description="Video with text overlay (path or data URI)"
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="8ef14de6-cc90-430a-8cfa-3a003be92454",
+            description="Add text overlay/caption to video",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=self.Input,
+            output_schema=self.Output,
+            test_input={"video_in": "/tmp/test.mp4", "text": "Hello World"},
+            test_output=[("video_out", str)],
+            test_mock={
+                "_add_text_overlay": lambda *args: None,
+                "_store_input_video": lambda *args, **kwargs: "test.mp4",
+                "_store_output_video": lambda *args, **kwargs: "overlay_test.mp4",
+            },
+        )
+
+    async def _store_input_video(
+        self, graph_exec_id: str, file: MediaFileType, user_id: str
+    ) -> MediaFileType:
+        """Store input video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=False,
+        )
+
+    async def _store_output_video(
+        self,
+        graph_exec_id: str,
+        file: MediaFileType,
+        user_id: str,
+        return_content: bool,
+    ) -> MediaFileType:
+        """Store output video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=return_content,
+        )
+
+    def _add_text_overlay(
+        self,
+        video_abspath: str,
+        output_abspath: str,
+        text: str,
+        position: str,
+        start_time: float | None,
+        end_time: float | None,
+        font_size: int,
+        font_color: str,
+        bg_color: str | None,
+    ) -> None:
+        """Add text overlay to video. Extracted for testability."""
+        video = None
+        final = None
+        txt_clip = None
+        try:
+            video = VideoFileClip(video_abspath)
+
+            txt_clip = TextClip(
+                text=text,
+                font_size=font_size,
+                color=font_color,
+                bg_color=bg_color,
+            )
+
+            # Position mapping
+            pos_map = {
+                "top": ("center", "top"),
+                "center": ("center", "center"),
+                "bottom": ("center", "bottom"),
+                "top-left": ("left", "top"),
+                "top-right": ("right", "top"),
+                "bottom-left": ("left", "bottom"),
+                "bottom-right": ("right", "bottom"),
+            }
+
+            txt_clip = txt_clip.with_position(pos_map[position])
+
+            # Set timing
+            start = start_time or 0
+            end = end_time or video.duration
+            duration = max(0, end - start)
+            txt_clip = txt_clip.with_start(start).with_end(end).with_duration(duration)
+
+            final = CompositeVideoClip([video, txt_clip])
+            final.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
+
+        finally:
+            if txt_clip:
+                txt_clip.close()
+            if final:
+                final.close()
+            if video:
+                video.close()
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # Validate time range if both are provided
+        if (
+            input_data.start_time is not None
+            and input_data.end_time is not None
+            and input_data.end_time <= input_data.start_time
+        ):
+            raise BlockExecutionError(
+                message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})",
+                block_name=self.name,
+                block_id=str(self.id),
+            )
+
+        try:
+            # Store the input video locally
+            local_video_path = await self._store_input_video(
+                graph_exec_id, input_data.video_in, user_id
+            )
+            video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+
+            # Build output path
+            output_filename = MediaFileType(
+                f"{node_exec_id}_overlay_{os.path.basename(local_video_path)}"
+            )
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+            self._add_text_overlay(
+                video_abspath,
+                output_abspath,
+                input_data.text,
+                input_data.position,
+                input_data.start_time,
+                input_data.end_time,
+                input_data.font_size,
+                input_data.font_color,
+                input_data.bg_color,
+            )
+
+            # Return as data URI or path
+            video_out = await self._store_output_video(
+                graph_exec_id,
+                output_filename,
+                user_id,
+                input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_out", video_out
+
+        except BlockExecutionError:
+            raise
+        except Exception as e:
+            raise BlockExecutionError(
+                message=f"Failed to add text overlay: {e}",
+                block_name=self.name,
+                block_id=str(self.id),
+            ) from e
--- a/autogpt_platform/backend/backend/data/block_cost_config.py
+++ b/autogpt_platform/backend/backend/data/block_cost_config.py
@@ -36,12 +36,14 @@ from backend.blocks.replicate.replicate_block import ReplicateModelBlock
 from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
 from backend.blocks.talking_head import CreateTalkingAvatarVideoBlock
 from backend.blocks.text_to_speech_block import UnrealTextToSpeechBlock
+from backend.blocks.video.narration import VideoNarrationBlock
 from backend.data.block import Block, BlockCost, BlockCostType
 from backend.integrations.credentials_store import (
    aiml_api_credentials,
    anthropic_credentials,
    apollo_credentials,
    did_credentials,
+    elevenlabs_credentials,
    enrichlayer_credentials,
    groq_credentials,
    ideogram_credentials,
@@ -640,4 +642,16 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
            },
        ),
    ],
+    VideoNarrationBlock: [
+        BlockCost(
+            cost_amount=5,  # ElevenLabs TTS cost
+            cost_filter={
+                "credentials": {
+                    "id": elevenlabs_credentials.id,
+                    "provider": elevenlabs_credentials.provider,
+                    "type": elevenlabs_credentials.type,
+                }
+            },
+        )
+    ],
 }
--- a/autogpt_platform/backend/backend/integrations/credentials_store.py
+++ b/autogpt_platform/backend/backend/integrations/credentials_store.py
@@ -224,6 +224,14 @@ openweathermap_credentials = APIKeyCredentials(
    expires_at=None,
 )

+elevenlabs_credentials = APIKeyCredentials(
+    id="f4a8b6c2-3d1e-4f5a-9b8c-7d6e5f4a3b2c",
+    provider="elevenlabs",
+    api_key=SecretStr(settings.secrets.elevenlabs_api_key),
+    title="Use Credits for ElevenLabs",
+    expires_at=None,
+)
+
 DEFAULT_CREDENTIALS = [
    ollama_credentials,
    revid_credentials,
@@ -252,6 +260,7 @@ DEFAULT_CREDENTIALS = [
    v0_credentials,
    webshare_proxy_credentials,
    openweathermap_credentials,
+    elevenlabs_credentials,
 ]

 SYSTEM_CREDENTIAL_IDS = {cred.id for cred in DEFAULT_CREDENTIALS}
@@ -366,6 +375,8 @@ class IntegrationCredentialsStore:
            all_credentials.append(webshare_proxy_credentials)
        if settings.secrets.openweathermap_api_key:
            all_credentials.append(openweathermap_credentials)
+        if settings.secrets.elevenlabs_api_key:
+            all_credentials.append(elevenlabs_credentials)
        return all_credentials

    async def get_creds_by_id(
--- a/autogpt_platform/backend/backend/integrations/providers.py
+++ b/autogpt_platform/backend/backend/integrations/providers.py
@@ -18,6 +18,7 @@ class ProviderName(str, Enum):
    DISCORD = "discord"
    D_ID = "d_id"
    E2B = "e2b"
+    ELEVENLABS = "elevenlabs"
    FAL = "fal"
    GITHUB = "github"
    GOOGLE = "google"
--- a/autogpt_platform/backend/backend/util/settings.py
+++ b/autogpt_platform/backend/backend/util/settings.py
@@ -630,6 +630,7 @@ class Secrets(UpdateTrackingModel["Secrets"], BaseSettings):
    e2b_api_key: str = Field(default="", description="E2B API key")
    nvidia_api_key: str = Field(default="", description="Nvidia API key")
    mem0_api_key: str = Field(default="", description="Mem0 API key")
+    elevenlabs_api_key: str = Field(default="", description="ElevenLabs API key")

    linear_client_id: str = Field(default="", description="Linear client ID")
    linear_client_secret: str = Field(default="", description="Linear client secret")
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
@@ -1169,6 +1169,29 @@ attrs = ">=21.3.0"
 e2b = ">=1.5.4,<2.0.0"
 httpx = ">=0.20.0,<1.0.0"

+[[package]]
+name = "elevenlabs"
+version = "1.59.0"
+description = ""
+optional = false
+python-versions = "<4.0,>=3.8"
+groups = ["main"]
+files = [
+    {file = "elevenlabs-1.59.0-py3-none-any.whl", hash = "sha256:468145db81a0bc867708b4a8619699f75583e9481b395ec1339d0b443da771ed"},
+    {file = "elevenlabs-1.59.0.tar.gz", hash = "sha256:16e735bd594e86d415dd445d249c8cc28b09996cfd627fbc10102c0a84698859"},
+]
+
+[package.dependencies]
+httpx = ">=0.21.2"
+pydantic = ">=1.9.2"
+pydantic-core = ">=2.18.2,<3.0.0"
+requests = ">=2.20"
+typing_extensions = ">=4.0.0"
+websockets = ">=11.0"
+
+[package.extras]
+pyaudio = ["pyaudio (>=0.2.14)"]
+
 [[package]]
 name = "email-validator"
 version = "2.2.0"
@@ -7361,6 +7384,28 @@ files = [
 defusedxml = ">=0.7.1,<0.8.0"
 requests = "*"

+[[package]]
+name = "yt-dlp"
+version = "2024.12.23"
+description = "A feature-rich command-line audio/video downloader"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "yt_dlp-2024.12.23-py3-none-any.whl", hash = "sha256:2fc08a5221a0379628ac4e7324c6c69a95b9fdfa7a7ca3187444b3b7451e38be"},
+    {file = "yt_dlp-2024.12.23.tar.gz", hash = "sha256:ac0e72b5a9017ba104b4258546201a7cedc38e8bd20727e0c63b77c829b425e9"},
+]
+
+[package.extras]
+build = ["build", "hatchling", "pip", "setuptools (>=71.0.2)", "wheel"]
+curl-cffi = ["curl-cffi (==0.5.10) ; os_name == \"nt\" and implementation_name == \"cpython\"", "curl-cffi (>=0.5.10,!=0.6.*,<0.7.2) ; os_name != \"nt\" and implementation_name == \"cpython\""]
+default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=1.26.17,<3)", "websockets (>=13.0)"]
+dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.8.0,<0.9.0)"]
+pyinstaller = ["pyinstaller (>=6.11.1)"]
+secretstorage = ["cffi", "secretstorage"]
+static-analysis = ["autopep8 (>=2.0,<3.0)", "ruff (>=0.8.0,<0.9.0)"]
+test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
+
 [[package]]
 name = "zerobouncesdk"
 version = "1.1.2"
@@ -7512,4 +7557,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "18b92e09596298c82432e4d0a85cb6d80a40b4229bee0a0c15f0529fd6cb21a4"
+content-hash = "ee24b0e885ea951eecbda5e76314d711ed5ae02f63c69fd79c11ad2e3fe5fb0f"
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -20,6 +20,7 @@ click = "^8.2.0"
 cryptography = "^45.0"
 discord-py = "^2.5.2"
 e2b-code-interpreter = "^1.5.2"
+elevenlabs = "^1.50.0"
 fastapi = "^0.116.1"
 feedparser = "^6.0.11"
 flake8 = "^7.3.0"
@@ -71,6 +72,7 @@ tweepy = "^4.16.0"
 uvicorn = { extras = ["standard"], version = "^0.35.0" }
 websockets = "^15.0"
 youtube-transcript-api = "^1.2.1"
+yt-dlp = "^2024.12.13"
 zerobouncesdk = "^1.1.2"
 # NOTE: please insert new dependencies in their alphabetical location
 pytest-snapshot = "^0.9.0"
--- a/autogpt_platform/frontend/src/components/contextual/CredentialsInput/helpers.ts
+++ b/autogpt_platform/frontend/src/components/contextual/CredentialsInput/helpers.ts
@@ -26,6 +26,7 @@ export const providerIcons: Partial<
  nvidia: fallbackIcon,
  discord: FaDiscord,
  d_id: fallbackIcon,
+  elevenlabs: fallbackIcon,
  google_maps: FaGoogle,
  jina: fallbackIcon,
  ideogram: fallbackIcon,
--- a/backend/blocks/video/init.py
+++ b/backend/blocks/video/init.py
@@ -1 +0,0 @@
-# Video editing blocks
Author	SHA1	Message	Date
Nicholas Tindle	6cd62c4d50	Merge branch 'dev' into feature/video-editing-blocks	2026-01-23 12:39:34 -06:00
Nicholas Tindle	9f4c33a695	feat(video): refactor video storage methods for improved testability across blocks	2026-01-23 12:36:28 -06:00
Nicholas Tindle	b0debe9488	Merge branch 'feature/video-editing-blocks' of https://github.com/Significant-Gravitas/AutoGPT into feature/video-editing-blocks	2026-01-23 12:16:34 -06:00
Nicholas Tindle	b20767bde9	feat(blocks): add ElevenLabs integration and enhance video processing blocks with media file handling	2026-01-23 12:15:59 -06:00
claude[bot]	b9a9481381	chore(backend): regenerate poetry.lock file Co-authored-by: Nicholas Tindle <ntindle@users.noreply.github.com>	2026-01-23 01:43:25 +00:00
Nicholas Tindle	d2d2a0c0c9	feat(backend): integrate ElevenLabs for video narration and add cost configuration - Implemented ElevenLabs API integration for generating AI narration in videos. - Updated VideoNarrationBlock to handle audio generation and mixing with video. - Added ElevenLabs credentials to the credentials store. - Configured block costs for using ElevenLabs TTS. - Enhanced video processing blocks (concat, download, text overlay) for improved functionality. - Updated dependencies in poetry.lock for ElevenLabs SDK and yt-dlp. - Added provider icon for ElevenLabs in frontend credentials input.	2026-01-22 19:26:39 -06:00
Nicholas Tindle	521f69220d	feat(blocks): export all 8 video blocks from module Includes migrated blocks from media.py: - MediaDurationBlock - LoopVideoBlock - AddAudioToVideoBlock	2026-01-22 13:55:22 -06:00
Nicholas Tindle	368adc985d	feat(blocks): migrate AddAudioToVideoBlock from media.py Per review feedback from @majdyz - consolidating video blocks	2026-01-22 13:55:03 -06:00
Nicholas Tindle	8c3216f0a2	feat(blocks): migrate LoopVideoBlock from media.py Per review feedback from @majdyz - consolidating video blocks	2026-01-22 13:55:02 -06:00
Nicholas Tindle	94063616e5	feat(blocks): migrate MediaDurationBlock from media.py Per review feedback from @majdyz - consolidating video blocks	2026-01-22 13:55:00 -06:00
Nicholas Tindle	2433a86cb1	fix(blocks): correct import paths in video __init__.py	2026-01-22 13:52:26 -06:00
Nicholas Tindle	0ede203f8e	feat(blocks): add VideoNarrationBlock - Move imports to top level - Use tempfile for secure temp paths - Add exception chaining (from e) - Close AudioFileClip in finally block - Document that ducking = reduced volume mix - Extract helper method for test mocking - Proper resource cleanup	2026-01-22 13:52:10 -06:00
Nicholas Tindle	dc751316c5	feat(blocks): add VideoTextOverlayBlock - Move imports to top level - Use tempfile for secure temp paths - Add exception chaining (from e) - Add start_time/end_time validation - Extract helper method for test mocking - Proper resource cleanup in finally	2026-01-22 13:51:37 -06:00
Nicholas Tindle	e7fb54e6af	feat(blocks): add VideoDownloadBlock - Move imports to top level - Use tempfile for secure temp paths - Add exception chaining (from e) - Extract helper method for test mocking	2026-01-22 13:51:04 -06:00
Nicholas Tindle	7b76f4d1e4	feat(blocks): add VideoConcatBlock - Move imports to top level - Use tempfile for secure temp paths - Add exception chaining (from e) - Constrain output_format to enum - Add ge=0.0 to transition_duration - Extract helper method for test mocking - Proper resource cleanup in finally	2026-01-22 13:50:35 -06:00
Nicholas Tindle	3cc56de0fa	feat(blocks): add VideoClipBlock - Move imports to top level - Use tempfile for secure temp paths - Add exception chaining (from e) - Constrain output_format to enum - Extract helper method for test mocking - Proper resource cleanup in finally	2026-01-22 13:50:12 -06:00
Nicholas Tindle	d2bead0f7a	feat(blocks): create video module with all blocks Consolidate video editing blocks into dedicated module. Migrate blocks from media.py per review feedback. Addresses: @majdyz review comment	2026-01-22 13:49:48 -06:00
claude[bot]	f8d3893c16	fix(blocks): Address review feedback for video editing blocks - Add start_time < end_time validation in VideoClipBlock and VideoTextOverlayBlock - Fix resource leaks: close AudioFileClip in narration.py, TextClip in text_overlay.py - Fix concat.py: proper resource cleanup in finally block, load clips individually - Implement proper crossfade using crossfadein/crossfadeout - Implement ducking mode with stronger attenuation (0.3x original_volume) - Remove unused start_time/end_time params from VideoDownloadBlock - Fix None handling for duration/title in download.py (use 'or' instead of 'get' default) - Add exception chaining with 'from e' in all blocks - Add minimum clips validation in VideoConcatBlock - Sort __all__ in __init__.py - Increase ElevenLabs API timeout to 120s for longer scripts Co-authored-by: Nicholas Tindle <ntindle@users.noreply.github.com>	2026-01-18 23:27:04 +00:00
Nicholas Tindle	1cfbc0dd08	feat(video): Update __init__.py with full exports	2026-01-18 15:34:04 -06:00
Nicholas Tindle	ff84643b48	feat(video): Add VideoNarrationBlock	2026-01-18 15:33:48 -06:00
Nicholas Tindle	c19c3c834a	feat(video): Add VideoTextOverlayBlock	2026-01-18 15:33:47 -06:00
Nicholas Tindle	d0f7ba8cfd	feat(video): Add VideoConcatBlock	2026-01-18 15:33:46 -06:00
Nicholas Tindle	2a855f4bd0	feat(video): Add VideoClipBlock	2026-01-18 15:32:59 -06:00
Nicholas Tindle	b93bb3b9f8	feat(video): Add VideoDownloadBlock	2026-01-18 15:32:58 -06:00