feat(video): add model_id to VideoNarrationBlock for enhanced TTS model selection

feat(video): add video codec utility and update video processing blocks for codec handling
feat(video): update video processing blocks and documentation for enhanced functionality
2026-01-24 06:28:04 -05:00 · 2026-01-23 14:20:04 -06:00 · 2026-01-23 13:52:11 -06:00 · 2026-01-23 13:27:34 -06:00 · 2026-01-23 12:39:34 -06:00 · 2026-01-23 12:36:28 -06:00
39 changed files with 2139 additions and 589 deletions
--- a/autogpt_platform/backend/.env.default
+++ b/autogpt_platform/backend/.env.default
@@ -152,6 +152,7 @@ REPLICATE_API_KEY=
 REVID_API_KEY=
 SCREENSHOTONE_API_KEY=
 UNREAL_SPEECH_API_KEY=
+ELEVENLABS_API_KEY=

 # Data & Search Services
 E2B_API_KEY=
--- a/autogpt_platform/backend/Dockerfile
+++ b/autogpt_platform/backend/Dockerfile
@@ -62,10 +62,11 @@ ENV POETRY_HOME=/opt/poetry \
    DEBIAN_FRONTEND=noninteractive
 ENV PATH=/opt/poetry/bin:$PATH

-# Install Python without upgrading system-managed packages
+# Install Python and FFmpeg (required for video processing blocks)
 RUN apt-get update && apt-get install -y \
    python3.13 \
    python3-pip \
+    ffmpeg \
    && rm -rf /var/lib/apt/lists/*

 # Copy only necessary files from builder
--- a/autogpt_platform/backend/backend/api/features/store/db.py
+++ b/autogpt_platform/backend/backend/api/features/store/db.py
@@ -1552,7 +1552,7 @@ async def review_store_submission(

                # Generate embedding for approved listing (blocking - admin operation)
                # Inside transaction: if embedding fails, entire transaction rolls back
-                await ensure_embedding(
+                embedding_success = await ensure_embedding(
                    version_id=store_listing_version_id,
                    name=store_listing_version.name,
                    description=store_listing_version.description,
@@ -1560,6 +1560,12 @@ async def review_store_submission(
                    categories=store_listing_version.categories or [],
                    tx=tx,
                )
+                if not embedding_success:
+                    raise ValueError(
+                        f"Failed to generate embedding for listing {store_listing_version_id}. "
+                        "This is likely due to OpenAI API being unavailable. "
+                        "Please try again later or contact support if the issue persists."
+                    )

                await prisma.models.StoreListing.prisma(tx).update(
                    where={"id": store_listing_version.StoreListing.id},
--- a/autogpt_platform/backend/backend/api/features/store/embeddings.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings.py
@@ -63,42 +63,49 @@ def build_searchable_text(
    return " ".join(parts)


-async def generate_embedding(text: str) -> list[float]:
+async def generate_embedding(text: str) -> list[float] | None:
    """
    Generate embedding for text using OpenAI API.

-    Raises exceptions on failure - caller should handle.
+    Returns None if embedding generation fails.
+    Fail-fast: no retries to maintain consistency with approval flow.
    """
-    client = get_openai_client()
-    if not client:
-        raise RuntimeError("openai_internal_api_key not set, cannot generate embedding")
+    try:
+        client = get_openai_client()
+        if not client:
+            logger.error("openai_internal_api_key not set, cannot generate embedding")
+            return None

-    # Truncate text to token limit using tiktoken
-    # Character-based truncation is insufficient because token ratios vary by content type
-    enc = encoding_for_model(EMBEDDING_MODEL)
-    tokens = enc.encode(text)
-    if len(tokens) > EMBEDDING_MAX_TOKENS:
-        tokens = tokens[:EMBEDDING_MAX_TOKENS]
-        truncated_text = enc.decode(tokens)
-        logger.info(
-            f"Truncated text from {len(enc.encode(text))} to {len(tokens)} tokens"
+        # Truncate text to token limit using tiktoken
+        # Character-based truncation is insufficient because token ratios vary by content type
+        enc = encoding_for_model(EMBEDDING_MODEL)
+        tokens = enc.encode(text)
+        if len(tokens) > EMBEDDING_MAX_TOKENS:
+            tokens = tokens[:EMBEDDING_MAX_TOKENS]
+            truncated_text = enc.decode(tokens)
+            logger.info(
+                f"Truncated text from {len(enc.encode(text))} to {len(tokens)} tokens"
+            )
+        else:
+            truncated_text = text
+
+        start_time = time.time()
+        response = await client.embeddings.create(
+            model=EMBEDDING_MODEL,
+            input=truncated_text,
        )
-    else:
-        truncated_text = text
+        latency_ms = (time.time() - start_time) * 1000

-    start_time = time.time()
-    response = await client.embeddings.create(
-        model=EMBEDDING_MODEL,
-        input=truncated_text,
-    )
-    latency_ms = (time.time() - start_time) * 1000
+        embedding = response.data[0].embedding
+        logger.info(
+            f"Generated embedding: {len(embedding)} dims, "
+            f"{len(tokens)} tokens, {latency_ms:.0f}ms"
+        )
+        return embedding

-    embedding = response.data[0].embedding
-    logger.info(
-        f"Generated embedding: {len(embedding)} dims, "
-        f"{len(tokens)} tokens, {latency_ms:.0f}ms"
-    )
-    return embedding
+    except Exception as e:
+        logger.error(f"Failed to generate embedding: {e}")
+        return None


 async def store_embedding(
@@ -137,45 +144,48 @@ async def store_content_embedding(

    New function for unified content embedding storage.
    Uses raw SQL since Prisma doesn't natively support pgvector.
-
-    Raises exceptions on failure - caller should handle.
    """
-    client = tx if tx else prisma.get_client()
+    try:
+        client = tx if tx else prisma.get_client()

-    # Convert embedding to PostgreSQL vector format
-    embedding_str = embedding_to_vector_string(embedding)
-    metadata_json = dumps(metadata or {})
+        # Convert embedding to PostgreSQL vector format
+        embedding_str = embedding_to_vector_string(embedding)
+        metadata_json = dumps(metadata or {})

-    # Upsert the embedding
-    # WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
-    # Use unqualified ::vector - pgvector is in search_path on all environments
-    await execute_raw_with_schema(
-        """
-        INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
-            "id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
+        # Upsert the embedding
+        # WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
+        # Use unqualified ::vector - pgvector is in search_path on all environments
+        await execute_raw_with_schema(
+            """
+            INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
+                "id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
+            )
+            VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
+            ON CONFLICT ("contentType", "contentId", "userId")
+            DO UPDATE SET
+                "embedding" = $4::vector,
+                "searchableText" = $5,
+                "metadata" = $6::jsonb,
+                "updatedAt" = NOW()
+            WHERE {schema_prefix}"UnifiedContentEmbedding"."contentType" = $1::{schema_prefix}"ContentType"
+                AND {schema_prefix}"UnifiedContentEmbedding"."contentId" = $2
+                AND ({schema_prefix}"UnifiedContentEmbedding"."userId" = $3 OR ($3 IS NULL AND {schema_prefix}"UnifiedContentEmbedding"."userId" IS NULL))
+            """,
+            content_type,
+            content_id,
+            user_id,
+            embedding_str,
+            searchable_text,
+            metadata_json,
+            client=client,
        )
-        VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
-        ON CONFLICT ("contentType", "contentId", "userId")
-        DO UPDATE SET
-            "embedding" = $4::vector,
-            "searchableText" = $5,
-            "metadata" = $6::jsonb,
-            "updatedAt" = NOW()
-        WHERE {schema_prefix}"UnifiedContentEmbedding"."contentType" = $1::{schema_prefix}"ContentType"
-            AND {schema_prefix}"UnifiedContentEmbedding"."contentId" = $2
-            AND ({schema_prefix}"UnifiedContentEmbedding"."userId" = $3 OR ($3 IS NULL AND {schema_prefix}"UnifiedContentEmbedding"."userId" IS NULL))
-        """,
-        content_type,
-        content_id,
-        user_id,
-        embedding_str,
-        searchable_text,
-        metadata_json,
-        client=client,
-    )

-    logger.info(f"Stored embedding for {content_type}:{content_id}")
-    return True
+        logger.info(f"Stored embedding for {content_type}:{content_id}")
+        return True
+
+    except Exception as e:
+        logger.error(f"Failed to store embedding for {content_type}:{content_id}: {e}")
+        return False


 async def get_embedding(version_id: str) -> dict[str, Any] | None:
@@ -207,31 +217,34 @@ async def get_content_embedding(

    New function for unified content embedding retrieval.
    Returns dict with contentType, contentId, embedding, timestamps or None if not found.
-
-    Raises exceptions on failure - caller should handle.
    """
-    result = await query_raw_with_schema(
-        """
-        SELECT
-            "contentType",
-            "contentId",
-            "userId",
-            "embedding"::text as "embedding",
-            "searchableText",
-            "metadata",
-            "createdAt",
-            "updatedAt"
-        FROM {schema_prefix}"UnifiedContentEmbedding"
-        WHERE "contentType" = $1::{schema_prefix}"ContentType" AND "contentId" = $2 AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
-        """,
-        content_type,
-        content_id,
-        user_id,
-    )
+    try:
+        result = await query_raw_with_schema(
+            """
+            SELECT
+                "contentType",
+                "contentId",
+                "userId",
+                "embedding"::text as "embedding",
+                "searchableText",
+                "metadata",
+                "createdAt",
+                "updatedAt"
+            FROM {schema_prefix}"UnifiedContentEmbedding"
+            WHERE "contentType" = $1::{schema_prefix}"ContentType" AND "contentId" = $2 AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
+            """,
+            content_type,
+            content_id,
+            user_id,
+        )

-    if result and len(result) > 0:
-        return result[0]
-    return None
+        if result and len(result) > 0:
+            return result[0]
+        return None
+
+    except Exception as e:
+        logger.error(f"Failed to get embedding for {content_type}:{content_id}: {e}")
+        return None


 async def ensure_embedding(
@@ -259,38 +272,46 @@ async def ensure_embedding(
        tx: Optional transaction client

    Returns:
-        True if embedding exists/was created
-
-    Raises exceptions on failure - caller should handle.
+        True if embedding exists/was created, False on failure
    """
-    # Check if embedding already exists
-    if not force:
-        existing = await get_embedding(version_id)
-        if existing and existing.get("embedding"):
-            logger.debug(f"Embedding for version {version_id} already exists")
-            return True
+    try:
+        # Check if embedding already exists
+        if not force:
+            existing = await get_embedding(version_id)
+            if existing and existing.get("embedding"):
+                logger.debug(f"Embedding for version {version_id} already exists")
+                return True

-    # Build searchable text for embedding
-    searchable_text = build_searchable_text(name, description, sub_heading, categories)
+        # Build searchable text for embedding
+        searchable_text = build_searchable_text(
+            name, description, sub_heading, categories
+        )

-    # Generate new embedding
-    embedding = await generate_embedding(searchable_text)
+        # Generate new embedding
+        embedding = await generate_embedding(searchable_text)
+        if embedding is None:
+            logger.warning(f"Could not generate embedding for version {version_id}")
+            return False

-    # Store the embedding with metadata using new function
-    metadata = {
-        "name": name,
-        "subHeading": sub_heading,
-        "categories": categories,
-    }
-    return await store_content_embedding(
-        content_type=ContentType.STORE_AGENT,
-        content_id=version_id,
-        embedding=embedding,
-        searchable_text=searchable_text,
-        metadata=metadata,
-        user_id=None,  # Store agents are public
-        tx=tx,
-    )
+        # Store the embedding with metadata using new function
+        metadata = {
+            "name": name,
+            "subHeading": sub_heading,
+            "categories": categories,
+        }
+        return await store_content_embedding(
+            content_type=ContentType.STORE_AGENT,
+            content_id=version_id,
+            embedding=embedding,
+            searchable_text=searchable_text,
+            metadata=metadata,
+            user_id=None,  # Store agents are public
+            tx=tx,
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to ensure embedding for version {version_id}: {e}")
+        return False


 async def delete_embedding(version_id: str) -> bool:
@@ -500,24 +521,6 @@ async def backfill_all_content_types(batch_size: int = 10) -> dict[str, Any]:
            success = sum(1 for result in results if result is True)
            failed = len(results) - success

-            # Aggregate unique errors to avoid Sentry spam
-            if failed > 0:
-                # Group errors by type and message
-                error_summary: dict[str, int] = {}
-                for result in results:
-                    if isinstance(result, Exception):
-                        error_key = f"{type(result).__name__}: {str(result)}"
-                        error_summary[error_key] = error_summary.get(error_key, 0) + 1
-
-                # Log aggregated error summary
-                error_details = ", ".join(
-                    f"{error} ({count}x)" for error, count in error_summary.items()
-                )
-                logger.error(
-                    f"{content_type.value}: {failed}/{len(results)} embeddings failed. "
-                    f"Errors: {error_details}"
-                )
-
            results_by_type[content_type.value] = {
                "processed": len(missing_items),
                "success": success,
@@ -554,12 +557,11 @@ async def backfill_all_content_types(batch_size: int = 10) -> dict[str, Any]:
    }


-async def embed_query(query: str) -> list[float]:
+async def embed_query(query: str) -> list[float] | None:
    """
    Generate embedding for a search query.

    Same as generate_embedding but with clearer intent.
-    Raises exceptions on failure - caller should handle.
    """
    return await generate_embedding(query)

@@ -592,30 +594,40 @@ async def ensure_content_embedding(
        tx: Optional transaction client

    Returns:
-        True if embedding exists/was created
-
-    Raises exceptions on failure - caller should handle.
+        True if embedding exists/was created, False on failure
    """
-    # Check if embedding already exists
-    if not force:
-        existing = await get_content_embedding(content_type, content_id, user_id)
-        if existing and existing.get("embedding"):
-            logger.debug(f"Embedding for {content_type}:{content_id} already exists")
-            return True
+    try:
+        # Check if embedding already exists
+        if not force:
+            existing = await get_content_embedding(content_type, content_id, user_id)
+            if existing and existing.get("embedding"):
+                logger.debug(
+                    f"Embedding for {content_type}:{content_id} already exists"
+                )
+                return True

-    # Generate new embedding
-    embedding = await generate_embedding(searchable_text)
+        # Generate new embedding
+        embedding = await generate_embedding(searchable_text)
+        if embedding is None:
+            logger.warning(
+                f"Could not generate embedding for {content_type}:{content_id}"
+            )
+            return False

-    # Store the embedding
-    return await store_content_embedding(
-        content_type=content_type,
-        content_id=content_id,
-        embedding=embedding,
-        searchable_text=searchable_text,
-        metadata=metadata or {},
-        user_id=user_id,
-        tx=tx,
-    )
+        # Store the embedding
+        return await store_content_embedding(
+            content_type=content_type,
+            content_id=content_id,
+            embedding=embedding,
+            searchable_text=searchable_text,
+            metadata=metadata or {},
+            user_id=user_id,
+            tx=tx,
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to ensure embedding for {content_type}:{content_id}: {e}")
+        return False


 async def cleanup_orphaned_embeddings() -> dict[str, Any]:
@@ -842,8 +854,9 @@ async def semantic_search(
        limit = 100

    # Generate query embedding
-    try:
-        query_embedding = await embed_query(query)
+    query_embedding = await embed_query(query)
+
+    if query_embedding is not None:
        # Semantic search with embeddings
        embedding_str = embedding_to_vector_string(query_embedding)

@@ -894,21 +907,24 @@ async def semantic_search(
        """
        )

-        results = await query_raw_with_schema(sql, *params)
-        return [
-            {
-                "content_id": row["content_id"],
-                "content_type": row["content_type"],
-                "searchable_text": row["searchable_text"],
-                "metadata": row["metadata"],
-                "similarity": float(row["similarity"]),
-            }
-            for row in results
-        ]
-    except Exception as e:
-        logger.warning(f"Semantic search failed, falling back to lexical search: {e}")
+        try:
+            results = await query_raw_with_schema(sql, *params)
+            return [
+                {
+                    "content_id": row["content_id"],
+                    "content_type": row["content_type"],
+                    "searchable_text": row["searchable_text"],
+                    "metadata": row["metadata"],
+                    "similarity": float(row["similarity"]),
+                }
+                for row in results
+            ]
+        except Exception as e:
+            logger.error(f"Semantic search failed: {e}")
+            # Fall through to lexical search below

    # Fallback to lexical search if embeddings unavailable
+    logger.warning("Falling back to lexical search (embeddings unavailable)")

    params_lexical: list[Any] = [limit]
    user_filter = ""
--- a/autogpt_platform/backend/backend/api/features/store/embeddings_schema_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings_schema_test.py
@@ -298,16 +298,17 @@ async def test_schema_handling_error_cases():
            mock_client.execute_raw.side_effect = Exception("Database error")
            mock_get_client.return_value = mock_client

-            # Should raise exception on error
-            with pytest.raises(Exception, match="Database error"):
-                await embeddings.store_content_embedding(
-                    content_type=ContentType.STORE_AGENT,
-                    content_id="test-id",
-                    embedding=[0.1] * EMBEDDING_DIM,
-                    searchable_text="test",
-                    metadata=None,
-                    user_id=None,
-                )
+            result = await embeddings.store_content_embedding(
+                content_type=ContentType.STORE_AGENT,
+                content_id="test-id",
+                embedding=[0.1] * EMBEDDING_DIM,
+                searchable_text="test",
+                metadata=None,
+                user_id=None,
+            )
+
+            # Should return False on error, not raise
+            assert result is False


 if __name__ == "__main__":
--- a/autogpt_platform/backend/backend/api/features/store/embeddings_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings_test.py
@@ -80,8 +80,9 @@ async def test_generate_embedding_no_api_key():
    ) as mock_get_client:
        mock_get_client.return_value = None

-        with pytest.raises(RuntimeError, match="openai_internal_api_key not set"):
-            await embeddings.generate_embedding("test text")
+        result = await embeddings.generate_embedding("test text")
+
+        assert result is None


@pytest.mark.asyncio(loop_scope="session")
@@ -96,8 +97,9 @@ async def test_generate_embedding_api_error():
    ) as mock_get_client:
        mock_get_client.return_value = mock_client

-        with pytest.raises(Exception, match="API Error"):
-            await embeddings.generate_embedding("test text")
+        result = await embeddings.generate_embedding("test text")
+
+        assert result is None


@pytest.mark.asyncio(loop_scope="session")
@@ -171,10 +173,11 @@ async def test_store_embedding_database_error(mocker):

    embedding = [0.1, 0.2, 0.3]

-    with pytest.raises(Exception, match="Database error"):
-        await embeddings.store_embedding(
-            version_id="test-version-id", embedding=embedding, tx=mock_client
-        )
+    result = await embeddings.store_embedding(
+        version_id="test-version-id", embedding=embedding, tx=mock_client
+    )
+
+    assert result is False


@pytest.mark.asyncio(loop_scope="session")
@@ -274,16 +277,17 @@ async def test_ensure_embedding_create_new(mock_get, mock_store, mock_generate):
 async def test_ensure_embedding_generation_fails(mock_get, mock_generate):
    """Test ensure_embedding when generation fails."""
    mock_get.return_value = None
-    mock_generate.side_effect = Exception("Generation failed")
+    mock_generate.return_value = None

-    with pytest.raises(Exception, match="Generation failed"):
-        await embeddings.ensure_embedding(
-            version_id="test-id",
-            name="Test",
-            description="Test description",
-            sub_heading="Test heading",
-            categories=["test"],
-        )
+    result = await embeddings.ensure_embedding(
+        version_id="test-id",
+        name="Test",
+        description="Test description",
+        sub_heading="Test heading",
+        categories=["test"],
+    )
+
+    assert result is False


@pytest.mark.asyncio(loop_scope="session")
--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
@@ -186,12 +186,13 @@ async def unified_hybrid_search(

    offset = (page - 1) * page_size

-    # Generate query embedding with graceful degradation
-    try:
-        query_embedding = await embed_query(query)
-    except Exception as e:
+    # Generate query embedding
+    query_embedding = await embed_query(query)
+
+    # Graceful degradation if embedding unavailable
+    if query_embedding is None or not query_embedding:
        logger.warning(
-            f"Failed to generate query embedding - falling back to lexical-only search: {e}. "
+            "Failed to generate query embedding - falling back to lexical-only search. "
            "Check that openai_internal_api_key is configured and OpenAI API is accessible."
        )
        query_embedding = [0.0] * EMBEDDING_DIM
@@ -463,12 +464,13 @@ async def hybrid_search(

    offset = (page - 1) * page_size

-    # Generate query embedding with graceful degradation
-    try:
-        query_embedding = await embed_query(query)
-    except Exception as e:
+    # Generate query embedding
+    query_embedding = await embed_query(query)
+
+    # Graceful degradation
+    if query_embedding is None or not query_embedding:
        logger.warning(
-            f"Failed to generate query embedding - falling back to lexical-only search: {e}"
+            "Failed to generate query embedding - falling back to lexical-only search."
        )
        query_embedding = [0.0] * EMBEDDING_DIM
        total_non_semantic = (
--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search_test.py
@@ -172,8 +172,8 @@ async def test_hybrid_search_without_embeddings():
        with patch(
            "backend.api.features.store.hybrid_search.query_raw_with_schema"
        ) as mock_query:
-            # Simulate embedding failure by raising exception
-            mock_embed.side_effect = Exception("Embedding generation failed")
+            # Simulate embedding failure
+            mock_embed.return_value = None
            mock_query.return_value = mock_results

            # Should NOT raise - graceful degradation
@@ -613,9 +613,7 @@ async def test_unified_hybrid_search_graceful_degradation():
            "backend.api.features.store.hybrid_search.embed_query"
        ) as mock_embed:
            mock_query.return_value = mock_results
-            mock_embed.side_effect = Exception(
-                "Embedding generation failed"
-            )  # Embedding failure
+            mock_embed.return_value = None  # Embedding failure

            # Should NOT raise - graceful degradation
            results, total = await unified_hybrid_search(
--- a/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py
+++ b/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py
@@ -0,0 +1,28 @@
+"""ElevenLabs integration blocks - test credentials and shared utilities."""
+
+from typing import Literal
+
+from pydantic import SecretStr
+
+from backend.data.model import APIKeyCredentials, CredentialsMetaInput
+from backend.integrations.providers import ProviderName
+
+TEST_CREDENTIALS = APIKeyCredentials(
+    id="01234567-89ab-cdef-0123-456789abcdef",
+    provider="elevenlabs",
+    api_key=SecretStr("mock-elevenlabs-api-key"),
+    title="Mock ElevenLabs API key",
+    expires_at=None,
+)
+
+TEST_CREDENTIALS_INPUT = {
+    "provider": TEST_CREDENTIALS.provider,
+    "id": TEST_CREDENTIALS.id,
+    "type": TEST_CREDENTIALS.type,
+    "title": TEST_CREDENTIALS.title,
+}
+
+ElevenLabsCredentials = APIKeyCredentials
+ElevenLabsCredentialsInput = CredentialsMetaInput[
+    Literal[ProviderName.ELEVENLABS], Literal["api_key"]
+]
--- a/autogpt_platform/backend/backend/blocks/media.py
+++ b/autogpt_platform/backend/backend/blocks/media.py
@@ -1,251 +0,0 @@
-import os
-import tempfile
-from typing import Literal, Optional
-
-from moviepy.audio.io.AudioFileClip import AudioFileClip
-from moviepy.video.fx.Loop import Loop
-from moviepy.video.io.VideoFileClip import VideoFileClip
-
-from backend.data.block import (
-    Block,
-    BlockCategory,
-    BlockOutput,
-    BlockSchemaInput,
-    BlockSchemaOutput,
-)
-from backend.data.model import SchemaField
-from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
-
-
-class MediaDurationBlock(Block):
-
-    class Input(BlockSchemaInput):
-        media_in: MediaFileType = SchemaField(
-            description="Media input (URL, data URI, or local path)."
-        )
-        is_video: bool = SchemaField(
-            description="Whether the media is a video (True) or audio (False).",
-            default=True,
-        )
-
-    class Output(BlockSchemaOutput):
-        duration: float = SchemaField(
-            description="Duration of the media file (in seconds)."
-        )
-
-    def __init__(self):
-        super().__init__(
-            id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6",
-            description="Block to get the duration of a media file.",
-            categories={BlockCategory.MULTIMEDIA},
-            input_schema=MediaDurationBlock.Input,
-            output_schema=MediaDurationBlock.Output,
-        )
-
-    async def run(
-        self,
-        input_data: Input,
-        *,
-        graph_exec_id: str,
-        user_id: str,
-        **kwargs,
-    ) -> BlockOutput:
-        # 1) Store the input media locally
-        local_media_path = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=input_data.media_in,
-            user_id=user_id,
-            return_content=False,
-        )
-        media_abspath = get_exec_file_path(graph_exec_id, local_media_path)
-
-        # 2) Load the clip
-        if input_data.is_video:
-            clip = VideoFileClip(media_abspath)
-        else:
-            clip = AudioFileClip(media_abspath)
-
-        yield "duration", clip.duration
-
-
-class LoopVideoBlock(Block):
-    """
-    Block for looping (repeating) a video clip until a given duration or number of loops.
-    """
-
-    class Input(BlockSchemaInput):
-        video_in: MediaFileType = SchemaField(
-            description="The input video (can be a URL, data URI, or local path)."
-        )
-        # Provide EITHER a `duration` or `n_loops` or both. We'll demonstrate `duration`.
-        duration: Optional[float] = SchemaField(
-            description="Target duration (in seconds) to loop the video to. If omitted, defaults to no looping.",
-            default=None,
-            ge=0.0,
-        )
-        n_loops: Optional[int] = SchemaField(
-            description="Number of times to repeat the video. If omitted, defaults to 1 (no repeat).",
-            default=None,
-            ge=1,
-        )
-        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
-            description="How to return the output video. Either a relative path or base64 data URI.",
-            default="file_path",
-        )
-
-    class Output(BlockSchemaOutput):
-        video_out: str = SchemaField(
-            description="Looped video returned either as a relative path or a data URI."
-        )
-
-    def __init__(self):
-        super().__init__(
-            id="8bf9eef6-5451-4213-b265-25306446e94b",
-            description="Block to loop a video to a given duration or number of repeats.",
-            categories={BlockCategory.MULTIMEDIA},
-            input_schema=LoopVideoBlock.Input,
-            output_schema=LoopVideoBlock.Output,
-        )
-
-    async def run(
-        self,
-        input_data: Input,
-        *,
-        node_exec_id: str,
-        graph_exec_id: str,
-        user_id: str,
-        **kwargs,
-    ) -> BlockOutput:
-        # 1) Store the input video locally
-        local_video_path = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=input_data.video_in,
-            user_id=user_id,
-            return_content=False,
-        )
-        input_abspath = get_exec_file_path(graph_exec_id, local_video_path)
-
-        # 2) Load the clip
-        clip = VideoFileClip(input_abspath)
-
-        # 3) Apply the loop effect
-        looped_clip = clip
-        if input_data.duration:
-            # Loop until we reach the specified duration
-            looped_clip = looped_clip.with_effects([Loop(duration=input_data.duration)])
-        elif input_data.n_loops:
-            looped_clip = looped_clip.with_effects([Loop(n=input_data.n_loops)])
-        else:
-            raise ValueError("Either 'duration' or 'n_loops' must be provided.")
-
-        assert isinstance(looped_clip, VideoFileClip)
-
-        # 4) Save the looped output
-        output_filename = MediaFileType(
-            f"{node_exec_id}_looped_{os.path.basename(local_video_path)}"
-        )
-        output_abspath = get_exec_file_path(graph_exec_id, output_filename)
-
-        looped_clip = looped_clip.with_audio(clip.audio)
-        looped_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
-
-        # Return as data URI
-        video_out = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=output_filename,
-            user_id=user_id,
-            return_content=input_data.output_return_type == "data_uri",
-        )
-
-        yield "video_out", video_out
-
-
-class AddAudioToVideoBlock(Block):
-    """
-    Block that adds (attaches) an audio track to an existing video.
-    Optionally scale the volume of the new track.
-    """
-
-    class Input(BlockSchemaInput):
-        video_in: MediaFileType = SchemaField(
-            description="Video input (URL, data URI, or local path)."
-        )
-        audio_in: MediaFileType = SchemaField(
-            description="Audio input (URL, data URI, or local path)."
-        )
-        volume: float = SchemaField(
-            description="Volume scale for the newly attached audio track (1.0 = original).",
-            default=1.0,
-        )
-        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
-            description="Return the final output as a relative path or base64 data URI.",
-            default="file_path",
-        )
-
-    class Output(BlockSchemaOutput):
-        video_out: MediaFileType = SchemaField(
-            description="Final video (with attached audio), as a path or data URI."
-        )
-
-    def __init__(self):
-        super().__init__(
-            id="3503748d-62b6-4425-91d6-725b064af509",
-            description="Block to attach an audio file to a video file using moviepy.",
-            categories={BlockCategory.MULTIMEDIA},
-            input_schema=AddAudioToVideoBlock.Input,
-            output_schema=AddAudioToVideoBlock.Output,
-        )
-
-    async def run(
-        self,
-        input_data: Input,
-        *,
-        node_exec_id: str,
-        graph_exec_id: str,
-        user_id: str,
-        **kwargs,
-    ) -> BlockOutput:
-        # 1) Store the inputs locally
-        local_video_path = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=input_data.video_in,
-            user_id=user_id,
-            return_content=False,
-        )
-        local_audio_path = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=input_data.audio_in,
-            user_id=user_id,
-            return_content=False,
-        )
-
-        abs_temp_dir = os.path.join(tempfile.gettempdir(), "exec_file", graph_exec_id)
-        video_abspath = os.path.join(abs_temp_dir, local_video_path)
-        audio_abspath = os.path.join(abs_temp_dir, local_audio_path)
-
-        # 2) Load video + audio with moviepy
-        video_clip = VideoFileClip(video_abspath)
-        audio_clip = AudioFileClip(audio_abspath)
-        # Optionally scale volume
-        if input_data.volume != 1.0:
-            audio_clip = audio_clip.with_volume_scaled(input_data.volume)
-
-        # 3) Attach the new audio track
-        final_clip = video_clip.with_audio(audio_clip)
-
-        # 4) Write to output file
-        output_filename = MediaFileType(
-            f"{node_exec_id}_audio_attached_{os.path.basename(local_video_path)}"
-        )
-        output_abspath = os.path.join(abs_temp_dir, output_filename)
-        final_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
-
-        # 5) Return either path or data URI
-        video_out = await store_media_file(
-            graph_exec_id=graph_exec_id,
-            file=output_filename,
-            user_id=user_id,
-            return_content=input_data.output_return_type == "data_uri",
-        )
-
-        yield "video_out", video_out
--- a/autogpt_platform/backend/backend/blocks/video/init.py
+++ b/autogpt_platform/backend/backend/blocks/video/init.py
@@ -0,0 +1,37 @@
+"""Video editing blocks for AutoGPT Platform.
+
+This module provides blocks for:
+- Downloading videos from URLs (YouTube, Vimeo, news sites, direct links)
+- Clipping/trimming video segments
+- Concatenating multiple videos
+- Adding text overlays
+- Adding AI-generated narration
+- Getting media duration
+- Looping videos
+- Adding audio to videos
+
+Dependencies:
+- yt-dlp: For video downloading
+- moviepy: For video editing operations
+- requests: For API calls (narration block)
+"""
+
+from backend.blocks.video.add_audio import AddAudioToVideoBlock
+from backend.blocks.video.clip import VideoClipBlock
+from backend.blocks.video.concat import VideoConcatBlock
+from backend.blocks.video.download import VideoDownloadBlock
+from backend.blocks.video.duration import MediaDurationBlock
+from backend.blocks.video.loop import LoopVideoBlock
+from backend.blocks.video.narration import VideoNarrationBlock
+from backend.blocks.video.text_overlay import VideoTextOverlayBlock
+
+__all__ = [
+    "AddAudioToVideoBlock",
+    "LoopVideoBlock",
+    "MediaDurationBlock",
+    "VideoClipBlock",
+    "VideoConcatBlock",
+    "VideoDownloadBlock",
+    "VideoNarrationBlock",
+    "VideoTextOverlayBlock",
+]
--- a/autogpt_platform/backend/backend/blocks/video/_utils.py
+++ b/autogpt_platform/backend/backend/blocks/video/_utils.py
@@ -0,0 +1,34 @@
+"""Shared utilities for video blocks."""
+
+import os
+
+
+def get_video_codecs(output_path: str) -> tuple[str, str]:
+    """Get appropriate video and audio codecs based on output file extension.
+
+    Args:
+        output_path: Path to the output file (used to determine extension)
+
+    Returns:
+        Tuple of (video_codec, audio_codec)
+
+    Codec mappings:
+        - .mp4: H.264 + AAC (universal compatibility)
+        - .webm: VP8 + Vorbis (web streaming)
+        - .mkv: H.264 + AAC (container supports many codecs)
+        - .mov: H.264 + AAC (Apple QuickTime, widely compatible)
+        - .m4v: H.264 + AAC (Apple iTunes/devices)
+        - .avi: MPEG-4 + MP3 (legacy Windows)
+    """
+    ext = os.path.splitext(output_path)[1].lower()
+
+    codec_map: dict[str, tuple[str, str]] = {
+        ".mp4": ("libx264", "aac"),
+        ".webm": ("libvpx", "libvorbis"),
+        ".mkv": ("libx264", "aac"),
+        ".mov": ("libx264", "aac"),
+        ".m4v": ("libx264", "aac"),
+        ".avi": ("mpeg4", "libmp3lame"),
+    }
+
+    return codec_map.get(ext, ("libx264", "aac"))
--- a/autogpt_platform/backend/backend/blocks/video/add_audio.py
+++ b/autogpt_platform/backend/backend/blocks/video/add_audio.py
@@ -0,0 +1,127 @@
+"""AddAudioToVideoBlock - Attach an audio track to a video."""
+
+import os
+from typing import Literal
+
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import get_video_codecs
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class AddAudioToVideoBlock(Block):
+    """Attach an audio track to an existing video."""
+
+    class Input(BlockSchemaInput):
+        video_in: MediaFileType = SchemaField(
+            description="Video input (URL, data URI, or local path)."
+        )
+        audio_in: MediaFileType = SchemaField(
+            description="Audio input (URL, data URI, or local path)."
+        )
+        volume: float = SchemaField(
+            description="Volume scale for the newly attached audio track (1.0 = original).",
+            default=1.0,
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the final output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: MediaFileType = SchemaField(
+            description="Final video (with attached audio), as a path or data URI."
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="3503748d-62b6-4425-91d6-725b064af509",
+            description="Block to attach an audio file to a video file using moviepy.",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=AddAudioToVideoBlock.Input,
+            output_schema=AddAudioToVideoBlock.Output,
+        )
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # 1) Store the inputs locally
+        local_video_path = await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=input_data.video_in,
+            user_id=user_id,
+            return_content=False,
+        )
+        local_audio_path = await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=input_data.audio_in,
+            user_id=user_id,
+            return_content=False,
+        )
+
+        video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+        audio_abspath = get_exec_file_path(graph_exec_id, local_audio_path)
+
+        video_clip = None
+        audio_clip_original = None
+        audio_clip_scaled = None
+        final_clip = None
+        try:
+            # 2) Load video + audio with moviepy
+            video_clip = VideoFileClip(video_abspath)
+            audio_clip_original = AudioFileClip(audio_abspath)
+
+            # Optionally scale volume
+            audio_to_use = audio_clip_original
+            if input_data.volume != 1.0:
+                audio_clip_scaled = audio_clip_original.with_volume_scaled(
+                    input_data.volume
+                )
+                audio_to_use = audio_clip_scaled
+
+            # 3) Attach the new audio track
+            final_clip = video_clip.with_audio(audio_to_use)
+
+            # 4) Write to output file
+            output_filename = MediaFileType(
+                f"{node_exec_id}_audio_attached_{os.path.basename(local_video_path)}"
+            )
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+            video_codec, audio_codec = get_video_codecs(output_abspath)
+            final_clip.write_videofile(
+                output_abspath, codec=video_codec, audio_codec=audio_codec
+            )
+
+            # 5) Return either path or data URI
+            video_out = await store_media_file(
+                graph_exec_id=graph_exec_id,
+                file=output_filename,
+                user_id=user_id,
+                return_content=input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_out", video_out
+        finally:
+            if final_clip:
+                final_clip.close()
+            if audio_clip_scaled:
+                audio_clip_scaled.close()
+            if audio_clip_original:
+                audio_clip_original.close()
+            if video_clip:
+                video_clip.close()
--- a/autogpt_platform/backend/backend/blocks/video/clip.py
+++ b/autogpt_platform/backend/backend/blocks/video/clip.py
@@ -0,0 +1,172 @@
+"""VideoClipBlock - Extract a segment from a video file."""
+
+import os
+from typing import Literal
+
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import get_video_codecs
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoClipBlock(Block):
+    """Extract a time segment from a video."""
+
+    class Input(BlockSchemaInput):
+        video_in: MediaFileType = SchemaField(
+            description="Input video (URL, data URI, or local path)"
+        )
+        start_time: float = SchemaField(description="Start time in seconds", ge=0.0)
+        end_time: float = SchemaField(description="End time in seconds", ge=0.0)
+        output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField(
+            description="Output format", default="mp4", advanced=True
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: MediaFileType = SchemaField(
+            description="Clipped video file (path or data URI)"
+        )
+        duration: float = SchemaField(description="Clip duration in seconds")
+
+    def __init__(self):
+        super().__init__(
+            id="8f539119-e580-4d86-ad41-86fbcb22abb1",
+            description="Extract a time segment from a video",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=self.Input,
+            output_schema=self.Output,
+            test_input={
+                "video_in": "/tmp/test.mp4",
+                "start_time": 0.0,
+                "end_time": 10.0,
+            },
+            test_output=[("video_out", str), ("duration", float)],
+            test_mock={
+                "_clip_video": lambda *args: 10.0,
+                "_store_input_video": lambda *args, **kwargs: "test.mp4",
+                "_store_output_video": lambda *args, **kwargs: "clip_test.mp4",
+            },
+        )
+
+    async def _store_input_video(
+        self, graph_exec_id: str, file: MediaFileType, user_id: str
+    ) -> MediaFileType:
+        """Store input video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=False,
+        )
+
+    async def _store_output_video(
+        self,
+        graph_exec_id: str,
+        file: MediaFileType,
+        user_id: str,
+        return_content: bool,
+    ) -> MediaFileType:
+        """Store output video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=return_content,
+        )
+
+    def _clip_video(
+        self,
+        video_abspath: str,
+        output_abspath: str,
+        start_time: float,
+        end_time: float,
+    ) -> float:
+        """Extract a clip from a video. Extracted for testability."""
+        clip = None
+        subclip = None
+        try:
+            clip = VideoFileClip(video_abspath)
+            subclip = clip.subclipped(start_time, end_time)
+            video_codec, audio_codec = get_video_codecs(output_abspath)
+            subclip.write_videofile(
+                output_abspath, codec=video_codec, audio_codec=audio_codec
+            )
+            return subclip.duration
+        finally:
+            if subclip:
+                subclip.close()
+            if clip:
+                clip.close()
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # Validate time range
+        if input_data.end_time <= input_data.start_time:
+            raise BlockExecutionError(
+                message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})",
+                block_name=self.name,
+                block_id=str(self.id),
+            )
+
+        try:
+            # Store the input video locally
+            local_video_path = await self._store_input_video(
+                graph_exec_id, input_data.video_in, user_id
+            )
+            video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+
+            # Build output path
+            output_filename = MediaFileType(
+                f"{node_exec_id}_clip_{os.path.basename(local_video_path)}"
+            )
+            # Ensure correct extension
+            base, _ = os.path.splitext(output_filename)
+            output_filename = MediaFileType(f"{base}.{input_data.output_format}")
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+            duration = self._clip_video(
+                video_abspath,
+                output_abspath,
+                input_data.start_time,
+                input_data.end_time,
+            )
+
+            # Return as data URI or path
+            video_out = await self._store_output_video(
+                graph_exec_id,
+                output_filename,
+                user_id,
+                input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_out", video_out
+            yield "duration", duration
+
+        except BlockExecutionError:
+            raise
+        except Exception as e:
+            raise BlockExecutionError(
+                message=f"Failed to clip video: {e}",
+                block_name=self.name,
+                block_id=str(self.id),
+            ) from e
--- a/autogpt_platform/backend/backend/blocks/video/concat.py
+++ b/autogpt_platform/backend/backend/blocks/video/concat.py
@@ -0,0 +1,206 @@
+"""VideoConcatBlock - Concatenate multiple video clips into one."""
+
+from typing import Literal
+
+from moviepy import concatenate_videoclips
+from moviepy.video.fx import CrossFadeIn, CrossFadeOut, FadeIn, FadeOut
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import get_video_codecs
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoConcatBlock(Block):
+    """Merge multiple video clips into one continuous video."""
+
+    class Input(BlockSchemaInput):
+        videos: list[MediaFileType] = SchemaField(
+            description="List of video files to concatenate (in order)"
+        )
+        transition: Literal["none", "crossfade", "fade_black"] = SchemaField(
+            description="Transition between clips", default="none"
+        )
+        transition_duration: int = SchemaField(
+            description="Transition duration in seconds",
+            default=1,
+            ge=0,
+            advanced=True,
+        )
+        output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField(
+            description="Output format", default="mp4", advanced=True
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: MediaFileType = SchemaField(
+            description="Concatenated video file (path or data URI)"
+        )
+        total_duration: float = SchemaField(description="Total duration in seconds")
+
+    def __init__(self):
+        super().__init__(
+            id="9b0f531a-1118-487f-aeec-3fa63ea8900a",
+            description="Merge multiple video clips into one continuous video",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=self.Input,
+            output_schema=self.Output,
+            test_input={"videos": ["/tmp/a.mp4", "/tmp/b.mp4"]},
+            test_output=[("video_out", str), ("total_duration", float)],
+            test_mock={
+                "_concat_videos": lambda *args: 20.0,
+                "_store_input_video": lambda *args, **kwargs: "test.mp4",
+                "_store_output_video": lambda *args, **kwargs: "concat_test.mp4",
+            },
+        )
+
+    async def _store_input_video(
+        self, graph_exec_id: str, file: MediaFileType, user_id: str
+    ) -> MediaFileType:
+        """Store input video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=False,
+        )
+
+    async def _store_output_video(
+        self,
+        graph_exec_id: str,
+        file: MediaFileType,
+        user_id: str,
+        return_content: bool,
+    ) -> MediaFileType:
+        """Store output video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=return_content,
+        )
+
+    def _concat_videos(
+        self,
+        video_abspaths: list[str],
+        output_abspath: str,
+        transition: str,
+        transition_duration: int,
+    ) -> float:
+        """Concatenate videos. Extracted for testability."""
+        clips = []
+        faded_clips = []
+        final = None
+        try:
+            # Load clips
+            for v in video_abspaths:
+                clips.append(VideoFileClip(v))
+
+            if transition == "crossfade":
+                for i, clip in enumerate(clips):
+                    effects = []
+                    if i > 0:
+                        effects.append(CrossFadeIn(transition_duration))
+                    if i < len(clips) - 1:
+                        effects.append(CrossFadeOut(transition_duration))
+                    if effects:
+                        clip = clip.with_effects(effects)
+                    faded_clips.append(clip)
+                final = concatenate_videoclips(
+                    faded_clips,
+                    method="compose",
+                    padding=-transition_duration,
+                )
+            elif transition == "fade_black":
+                for clip in clips:
+                    faded = clip.with_effects(
+                        [FadeIn(transition_duration), FadeOut(transition_duration)]
+                    )
+                    faded_clips.append(faded)
+                final = concatenate_videoclips(faded_clips)
+            else:
+                final = concatenate_videoclips(clips)
+
+            video_codec, audio_codec = get_video_codecs(output_abspath)
+            final.write_videofile(
+                output_abspath, codec=video_codec, audio_codec=audio_codec
+            )
+
+            return final.duration
+        finally:
+            if final:
+                final.close()
+            for clip in faded_clips:
+                clip.close()
+            for clip in clips:
+                clip.close()
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # Validate minimum clips
+        if len(input_data.videos) < 2:
+            raise BlockExecutionError(
+                message="At least 2 videos are required for concatenation",
+                block_name=self.name,
+                block_id=str(self.id),
+            )
+
+        try:
+            # Store all input videos locally
+            video_abspaths = []
+            for video in input_data.videos:
+                local_path = await self._store_input_video(
+                    graph_exec_id, video, user_id
+                )
+                video_abspaths.append(get_exec_file_path(graph_exec_id, local_path))
+
+            # Build output path
+            output_filename = MediaFileType(
+                f"{node_exec_id}_concat.{input_data.output_format}"
+            )
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+            total_duration = self._concat_videos(
+                video_abspaths,
+                output_abspath,
+                input_data.transition,
+                input_data.transition_duration,
+            )
+
+            # Return as data URI or path
+            video_out = await self._store_output_video(
+                graph_exec_id,
+                output_filename,
+                user_id,
+                input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_out", video_out
+            yield "total_duration", total_duration
+
+        except BlockExecutionError:
+            raise
+        except Exception as e:
+            raise BlockExecutionError(
+                message=f"Failed to concatenate videos: {e}",
+                block_name=self.name,
+                block_id=str(self.id),
+            ) from e
--- a/autogpt_platform/backend/backend/blocks/video/download.py
+++ b/autogpt_platform/backend/backend/blocks/video/download.py
@@ -0,0 +1,177 @@
+"""VideoDownloadBlock - Download video from URL (YouTube, Vimeo, news sites, direct links)."""
+
+import os
+import typing
+from typing import Literal
+
+import yt_dlp
+
+if typing.TYPE_CHECKING:
+    from yt_dlp import _Params
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoDownloadBlock(Block):
+    """Download video from URL using yt-dlp."""
+
+    class Input(BlockSchemaInput):
+        url: str = SchemaField(
+            description="URL of the video to download (YouTube, Vimeo, direct link, etc.)",
+            placeholder="https://www.youtube.com/watch?v=...",
+        )
+        quality: Literal["best", "1080p", "720p", "480p", "audio_only"] = SchemaField(
+            description="Video quality preference", default="720p"
+        )
+        output_format: Literal["mp4", "webm", "mkv"] = SchemaField(
+            description="Output video format", default="mp4", advanced=True
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_file: MediaFileType = SchemaField(
+            description="Downloaded video (path or data URI)"
+        )
+        duration: float = SchemaField(description="Video duration in seconds")
+        title: str = SchemaField(description="Video title from source")
+        source_url: str = SchemaField(description="Original source URL")
+
+    def __init__(self):
+        super().__init__(
+            id="c35daabb-cd60-493b-b9ad-51f1fe4b50c4",
+            description="Download video from URL (YouTube, Vimeo, news sites, direct links)",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=self.Input,
+            output_schema=self.Output,
+            test_input={
+                "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+                "quality": "480p",
+            },
+            test_output=[
+                ("video_file", str),
+                ("duration", float),
+                ("title", str),
+                ("source_url", str),
+            ],
+            test_mock={
+                "_download_video": lambda *args: ("video.mp4", 212.0, "Test Video"),
+                "_store_output_video": lambda *args, **kwargs: "video.mp4",
+            },
+        )
+
+    async def _store_output_video(
+        self,
+        graph_exec_id: str,
+        file: MediaFileType,
+        user_id: str,
+        return_content: bool,
+    ) -> MediaFileType:
+        """Store output video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=return_content,
+        )
+
+    def _get_format_string(self, quality: str) -> str:
+        formats = {
+            "best": "bestvideo+bestaudio/best",
+            "1080p": "bestvideo[height<=1080]+bestaudio/best[height<=1080]",
+            "720p": "bestvideo[height<=720]+bestaudio/best[height<=720]",
+            "480p": "bestvideo[height<=480]+bestaudio/best[height<=480]",
+            "audio_only": "bestaudio/best",
+        }
+        return formats.get(quality, formats["720p"])
+
+    def _download_video(
+        self,
+        url: str,
+        quality: str,
+        output_format: str,
+        output_dir: str,
+        node_exec_id: str,
+    ) -> tuple[str, float, str]:
+        """Download video. Extracted for testability."""
+        output_template = os.path.join(
+            output_dir, f"{node_exec_id}_%(title).50s.%(ext)s"
+        )
+
+        ydl_opts: "_Params" = {
+            "format": self._get_format_string(quality),
+            "outtmpl": output_template,
+            "merge_output_format": output_format,
+            "quiet": True,
+            "no_warnings": True,
+        }
+
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=True)
+            video_path = ydl.prepare_filename(info)
+
+            # Handle format conversion in filename
+            if not video_path.endswith(f".{output_format}"):
+                video_path = video_path.rsplit(".", 1)[0] + f".{output_format}"
+
+            # Return just the filename, not the full path
+            filename = os.path.basename(video_path)
+
+            return (
+                filename,
+                info.get("duration") or 0.0,
+                info.get("title") or "Unknown",
+            )
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        try:
+            # Get the exec file directory
+            output_dir = get_exec_file_path(graph_exec_id, "")
+            os.makedirs(output_dir, exist_ok=True)
+
+            filename, duration, title = self._download_video(
+                input_data.url,
+                input_data.quality,
+                input_data.output_format,
+                output_dir,
+                node_exec_id,
+            )
+
+            # Return as data URI or path
+            video_out = await self._store_output_video(
+                graph_exec_id,
+                MediaFileType(filename),
+                user_id,
+                input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_file", video_out
+            yield "duration", duration
+            yield "title", title
+            yield "source_url", input_data.url
+
+        except Exception as e:
+            raise BlockExecutionError(
+                message=f"Failed to download video: {e}",
+                block_name=self.name,
+                block_id=str(self.id),
+            ) from e
--- a/autogpt_platform/backend/backend/blocks/video/duration.py
+++ b/autogpt_platform/backend/backend/blocks/video/duration.py
@@ -0,0 +1,71 @@
+"""MediaDurationBlock - Get the duration of a media file."""
+
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class MediaDurationBlock(Block):
+    """Get the duration of a media file."""
+
+    class Input(BlockSchemaInput):
+        media_in: MediaFileType = SchemaField(
+            description="Media input (URL, data URI, or local path)."
+        )
+        is_video: bool = SchemaField(
+            description="Whether the media is a video (True) or audio (False).",
+            default=True,
+        )
+
+    class Output(BlockSchemaOutput):
+        duration: float = SchemaField(
+            description="Duration of the media file (in seconds)."
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6",
+            description="Block to get the duration of a media file.",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=MediaDurationBlock.Input,
+            output_schema=MediaDurationBlock.Output,
+        )
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # 1) Store the input media locally
+        local_media_path = await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=input_data.media_in,
+            user_id=user_id,
+            return_content=False,
+        )
+        media_abspath = get_exec_file_path(graph_exec_id, local_media_path)
+
+        # 2) Load the clip
+        clip = None
+        try:
+            if input_data.is_video:
+                clip = VideoFileClip(media_abspath)
+            else:
+                clip = AudioFileClip(media_abspath)
+
+            yield "duration", clip.duration
+        finally:
+            if clip:
+                clip.close()
--- a/autogpt_platform/backend/backend/blocks/video/loop.py
+++ b/autogpt_platform/backend/backend/blocks/video/loop.py
@@ -0,0 +1,116 @@
+"""LoopVideoBlock - Loop a video to a given duration or number of repeats."""
+
+import os
+from typing import Literal, Optional
+
+from moviepy.video.fx.Loop import Loop
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import get_video_codecs
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class LoopVideoBlock(Block):
+    """Loop (repeat) a video clip until a given duration or number of loops."""
+
+    class Input(BlockSchemaInput):
+        video_in: MediaFileType = SchemaField(
+            description="The input video (can be a URL, data URI, or local path)."
+        )
+        duration: Optional[float] = SchemaField(
+            description="Target duration (in seconds) to loop the video to. If omitted, defaults to no looping.",
+            default=None,
+            ge=0.0,
+        )
+        n_loops: Optional[int] = SchemaField(
+            description="Number of times to repeat the video. If omitted, defaults to 1 (no repeat).",
+            default=None,
+            ge=1,
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="How to return the output video. Either a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: str = SchemaField(
+            description="Looped video returned either as a relative path or a data URI."
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="8bf9eef6-5451-4213-b265-25306446e94b",
+            description="Block to loop a video to a given duration or number of repeats.",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=LoopVideoBlock.Input,
+            output_schema=LoopVideoBlock.Output,
+        )
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # 1) Store the input video locally
+        local_video_path = await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=input_data.video_in,
+            user_id=user_id,
+            return_content=False,
+        )
+        input_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+
+        clip: VideoFileClip | None = None
+        looped_clip: VideoFileClip | None = None
+        try:
+            # 2) Load the clip
+            clip = VideoFileClip(input_abspath)
+
+            # 3) Apply the loop effect
+            # Note: Loop effect handles both video and audio looping automatically
+            if input_data.duration:
+                looped_clip = clip.with_effects([Loop(duration=input_data.duration)])  # type: ignore[arg-type] Clip implements shallow copy that loses type info
+            elif input_data.n_loops:
+                looped_clip = clip.with_effects([Loop(n=input_data.n_loops)])  # type: ignore[arg-type] Clip implements shallow copy that loses type info
+            else:
+                raise ValueError("Either 'duration' or 'n_loops' must be provided.")
+
+            # 4) Save the looped output
+            output_filename = MediaFileType(
+                f"{node_exec_id}_looped_{os.path.basename(local_video_path)}"
+            )
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+            assert looped_clip is not None
+
+            video_codec, audio_codec = get_video_codecs(output_abspath)
+            looped_clip.write_videofile(
+                output_abspath, codec=video_codec, audio_codec=audio_codec
+            )
+
+            # Return as data URI or path
+            video_out = await store_media_file(
+                graph_exec_id=graph_exec_id,
+                file=output_filename,
+                user_id=user_id,
+                return_content=input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_out", video_out
+        finally:
+            if looped_clip is not None:
+                looped_clip.close()
+            if clip is not None:
+                clip.close()
--- a/autogpt_platform/backend/backend/blocks/video/narration.py
+++ b/autogpt_platform/backend/backend/blocks/video/narration.py
@@ -0,0 +1,268 @@
+"""VideoNarrationBlock - Generate AI voice narration and add to video."""
+
+import os
+from typing import Literal
+
+from elevenlabs import ElevenLabs
+from moviepy import CompositeAudioClip
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.elevenlabs._auth import (
+    TEST_CREDENTIALS,
+    TEST_CREDENTIALS_INPUT,
+    ElevenLabsCredentials,
+    ElevenLabsCredentialsInput,
+)
+from backend.blocks.video._utils import get_video_codecs
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import CredentialsField, SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoNarrationBlock(Block):
+    """Generate AI narration and add to video."""
+
+    class Input(BlockSchemaInput):
+        credentials: ElevenLabsCredentialsInput = CredentialsField(
+            description="ElevenLabs API key for voice synthesis"
+        )
+        video_in: MediaFileType = SchemaField(
+            description="Input video (URL, data URI, or local path)"
+        )
+        script: str = SchemaField(description="Narration script text")
+        voice_id: str = SchemaField(
+            description="ElevenLabs voice ID", default="21m00Tcm4TlvDq8ikWAM"  # Rachel
+        )
+        model_id: Literal[
+            "eleven_multilingual_v2",
+            "eleven_flash_v2_5",
+            "eleven_turbo_v2_5",
+            "eleven_turbo_v2",
+        ] = SchemaField(
+            description="ElevenLabs TTS model",
+            default="eleven_multilingual_v2",
+        )
+        mix_mode: Literal["replace", "mix", "ducking"] = SchemaField(
+            description="How to combine with original audio. 'ducking' applies stronger attenuation than 'mix'.",
+            default="ducking",
+        )
+        narration_volume: float = SchemaField(
+            description="Narration volume (0.0 to 2.0)",
+            default=1.0,
+            ge=0.0,
+            le=2.0,
+            advanced=True,
+        )
+        original_volume: float = SchemaField(
+            description="Original audio volume when mixing (0.0 to 1.0)",
+            default=0.3,
+            ge=0.0,
+            le=1.0,
+            advanced=True,
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: MediaFileType = SchemaField(
+            description="Video with narration (path or data URI)"
+        )
+        audio_file: MediaFileType = SchemaField(
+            description="Generated audio file (path or data URI)"
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="3d036b53-859c-4b17-9826-ca340f736e0e",
+            description="Generate AI narration and add to video",
+            categories={BlockCategory.MULTIMEDIA, BlockCategory.AI},
+            input_schema=self.Input,
+            output_schema=self.Output,
+            test_input={
+                "video_in": "/tmp/test.mp4",
+                "script": "Hello world",
+                "credentials": TEST_CREDENTIALS_INPUT,
+            },
+            test_credentials=TEST_CREDENTIALS,
+            test_output=[("video_out", str), ("audio_file", str)],
+            test_mock={
+                "_generate_narration_audio": lambda *args: b"mock audio content",
+                "_add_narration_to_video": lambda *args: None,
+                "_store_input_video": lambda *args, **kwargs: "test.mp4",
+                "_store_output_video": lambda *args, **kwargs: "narrated_test.mp4",
+            },
+        )
+
+    async def _store_input_video(
+        self, graph_exec_id: str, file: MediaFileType, user_id: str
+    ) -> MediaFileType:
+        """Store input video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=False,
+        )
+
+    async def _store_output_video(
+        self,
+        graph_exec_id: str,
+        file: MediaFileType,
+        user_id: str,
+        return_content: bool,
+    ) -> MediaFileType:
+        """Store output video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=return_content,
+        )
+
+    def _generate_narration_audio(
+        self, api_key: str, script: str, voice_id: str, model_id: str
+    ) -> bytes:
+        """Generate narration audio via ElevenLabs API."""
+        client = ElevenLabs(api_key=api_key)
+        audio_generator = client.text_to_speech.convert(
+            voice_id=voice_id,
+            text=script,
+            model_id=model_id,
+        )
+        # The SDK returns a generator, collect all chunks
+        return b"".join(audio_generator)
+
+    def _add_narration_to_video(
+        self,
+        video_abspath: str,
+        audio_abspath: str,
+        output_abspath: str,
+        mix_mode: str,
+        narration_volume: float,
+        original_volume: float,
+    ) -> None:
+        """Add narration audio to video. Extracted for testability."""
+        video = None
+        final = None
+        narration_original = None
+        narration_scaled = None
+        original = None
+
+        try:
+            video = VideoFileClip(video_abspath)
+            narration_original = AudioFileClip(audio_abspath)
+            narration_scaled = narration_original.with_volume_scaled(narration_volume)
+            narration = narration_scaled
+
+            if mix_mode == "replace":
+                final_audio = narration
+            elif mix_mode == "mix":
+                if video.audio:
+                    original = video.audio.with_volume_scaled(original_volume)
+                    final_audio = CompositeAudioClip([original, narration])
+                else:
+                    final_audio = narration
+            else:  # ducking - apply stronger attenuation
+                if video.audio:
+                    # Ducking uses a much lower volume for original audio
+                    ducking_volume = original_volume * 0.3
+                    original = video.audio.with_volume_scaled(ducking_volume)
+                    final_audio = CompositeAudioClip([original, narration])
+                else:
+                    final_audio = narration
+
+            final = video.with_audio(final_audio)
+            video_codec, audio_codec = get_video_codecs(output_abspath)
+            final.write_videofile(
+                output_abspath, codec=video_codec, audio_codec=audio_codec
+            )
+
+        finally:
+            if original:
+                original.close()
+            if narration_scaled:
+                narration_scaled.close()
+            if narration_original:
+                narration_original.close()
+            if final:
+                final.close()
+            if video:
+                video.close()
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        credentials: ElevenLabsCredentials,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        try:
+            # Store the input video locally
+            local_video_path = await self._store_input_video(
+                graph_exec_id, input_data.video_in, user_id
+            )
+            video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+
+            # Generate narration audio via ElevenLabs
+            audio_content = self._generate_narration_audio(
+                credentials.api_key.get_secret_value(),
+                input_data.script,
+                input_data.voice_id,
+                input_data.model_id,
+            )
+
+            # Save audio to exec file path
+            audio_filename = MediaFileType(f"{node_exec_id}_narration.mp3")
+            audio_abspath = get_exec_file_path(graph_exec_id, audio_filename)
+            os.makedirs(os.path.dirname(audio_abspath), exist_ok=True)
+            with open(audio_abspath, "wb") as f:
+                f.write(audio_content)
+
+            # Add narration to video
+            output_filename = MediaFileType(
+                f"{node_exec_id}_narrated_{os.path.basename(local_video_path)}"
+            )
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+            self._add_narration_to_video(
+                video_abspath,
+                audio_abspath,
+                output_abspath,
+                input_data.mix_mode,
+                input_data.narration_volume,
+                input_data.original_volume,
+            )
+
+            # Return as data URI or path
+            return_as_data_uri = input_data.output_return_type == "data_uri"
+
+            video_out = await self._store_output_video(
+                graph_exec_id, output_filename, user_id, return_as_data_uri
+            )
+
+            audio_out = await self._store_output_video(
+                graph_exec_id, audio_filename, user_id, return_as_data_uri
+            )
+
+            yield "video_out", video_out
+            yield "audio_file", audio_out
+
+        except Exception as e:
+            raise BlockExecutionError(
+                message=f"Failed to add narration: {e}",
+                block_name=self.name,
+                block_id=str(self.id),
+            ) from e
--- a/autogpt_platform/backend/backend/blocks/video/text_overlay.py
+++ b/autogpt_platform/backend/backend/blocks/video/text_overlay.py
@@ -0,0 +1,234 @@
+"""VideoTextOverlayBlock - Add text overlay to video."""
+
+import os
+from typing import Literal
+
+from moviepy import CompositeVideoClip, TextClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from backend.blocks.video._utils import get_video_codecs
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import SchemaField
+from backend.util.exceptions import BlockExecutionError
+from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
+
+
+class VideoTextOverlayBlock(Block):
+    """Add text overlay/caption to video."""
+
+    class Input(BlockSchemaInput):
+        video_in: MediaFileType = SchemaField(
+            description="Input video (URL, data URI, or local path)"
+        )
+        text: str = SchemaField(description="Text to overlay on video")
+        position: Literal[
+            "top",
+            "center",
+            "bottom",
+            "top-left",
+            "top-right",
+            "bottom-left",
+            "bottom-right",
+        ] = SchemaField(description="Position of text on screen", default="bottom")
+        start_time: float | None = SchemaField(
+            description="When to show text (seconds). None = entire video",
+            default=None,
+            advanced=True,
+        )
+        end_time: float | None = SchemaField(
+            description="When to hide text (seconds). None = until end",
+            default=None,
+            advanced=True,
+        )
+        font_size: int = SchemaField(
+            description="Font size", default=48, ge=12, le=200, advanced=True
+        )
+        font_color: str = SchemaField(
+            description="Font color (hex or name)", default="white", advanced=True
+        )
+        bg_color: str | None = SchemaField(
+            description="Background color behind text (None for transparent)",
+            default=None,
+            advanced=True,
+        )
+        output_return_type: Literal["file_path", "data_uri"] = SchemaField(
+            description="Return the output as a relative path or base64 data URI.",
+            default="file_path",
+        )
+
+    class Output(BlockSchemaOutput):
+        video_out: MediaFileType = SchemaField(
+            description="Video with text overlay (path or data URI)"
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="8ef14de6-cc90-430a-8cfa-3a003be92454",
+            description="Add text overlay/caption to video",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=self.Input,
+            output_schema=self.Output,
+            test_input={"video_in": "/tmp/test.mp4", "text": "Hello World"},
+            test_output=[("video_out", str)],
+            test_mock={
+                "_add_text_overlay": lambda *args: None,
+                "_store_input_video": lambda *args, **kwargs: "test.mp4",
+                "_store_output_video": lambda *args, **kwargs: "overlay_test.mp4",
+            },
+        )
+
+    async def _store_input_video(
+        self, graph_exec_id: str, file: MediaFileType, user_id: str
+    ) -> MediaFileType:
+        """Store input video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=False,
+        )
+
+    async def _store_output_video(
+        self,
+        graph_exec_id: str,
+        file: MediaFileType,
+        user_id: str,
+        return_content: bool,
+    ) -> MediaFileType:
+        """Store output video. Extracted for testability."""
+        return await store_media_file(
+            graph_exec_id=graph_exec_id,
+            file=file,
+            user_id=user_id,
+            return_content=return_content,
+        )
+
+    def _add_text_overlay(
+        self,
+        video_abspath: str,
+        output_abspath: str,
+        text: str,
+        position: str,
+        start_time: float | None,
+        end_time: float | None,
+        font_size: int,
+        font_color: str,
+        bg_color: str | None,
+    ) -> None:
+        """Add text overlay to video. Extracted for testability."""
+        video = None
+        final = None
+        txt_clip = None
+        try:
+            video = VideoFileClip(video_abspath)
+
+            txt_clip = TextClip(
+                text=text,
+                font_size=font_size,
+                color=font_color,
+                bg_color=bg_color,
+            )
+
+            # Position mapping
+            pos_map = {
+                "top": ("center", "top"),
+                "center": ("center", "center"),
+                "bottom": ("center", "bottom"),
+                "top-left": ("left", "top"),
+                "top-right": ("right", "top"),
+                "bottom-left": ("left", "bottom"),
+                "bottom-right": ("right", "bottom"),
+            }
+
+            txt_clip = txt_clip.with_position(pos_map[position])
+
+            # Set timing
+            start = start_time or 0
+            end = end_time or video.duration
+            duration = max(0, end - start)
+            txt_clip = txt_clip.with_start(start).with_end(end).with_duration(duration)
+
+            final = CompositeVideoClip([video, txt_clip])
+            video_codec, audio_codec = get_video_codecs(output_abspath)
+            final.write_videofile(
+                output_abspath, codec=video_codec, audio_codec=audio_codec
+            )
+
+        finally:
+            if txt_clip:
+                txt_clip.close()
+            if final:
+                final.close()
+            if video:
+                video.close()
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        node_exec_id: str,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        # Validate time range if both are provided
+        if (
+            input_data.start_time is not None
+            and input_data.end_time is not None
+            and input_data.end_time <= input_data.start_time
+        ):
+            raise BlockExecutionError(
+                message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})",
+                block_name=self.name,
+                block_id=str(self.id),
+            )
+
+        try:
+            # Store the input video locally
+            local_video_path = await self._store_input_video(
+                graph_exec_id, input_data.video_in, user_id
+            )
+            video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
+
+            # Build output path
+            output_filename = MediaFileType(
+                f"{node_exec_id}_overlay_{os.path.basename(local_video_path)}"
+            )
+            output_abspath = get_exec_file_path(graph_exec_id, output_filename)
+
+            self._add_text_overlay(
+                video_abspath,
+                output_abspath,
+                input_data.text,
+                input_data.position,
+                input_data.start_time,
+                input_data.end_time,
+                input_data.font_size,
+                input_data.font_color,
+                input_data.bg_color,
+            )
+
+            # Return as data URI or path
+            video_out = await self._store_output_video(
+                graph_exec_id,
+                output_filename,
+                user_id,
+                input_data.output_return_type == "data_uri",
+            )
+
+            yield "video_out", video_out
+
+        except BlockExecutionError:
+            raise
+        except Exception as e:
+            raise BlockExecutionError(
+                message=f"Failed to add text overlay: {e}",
+                block_name=self.name,
+                block_id=str(self.id),
+            ) from e
--- a/autogpt_platform/backend/backend/data/block_cost_config.py
+++ b/autogpt_platform/backend/backend/data/block_cost_config.py
@@ -36,12 +36,14 @@ from backend.blocks.replicate.replicate_block import ReplicateModelBlock
 from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
 from backend.blocks.talking_head import CreateTalkingAvatarVideoBlock
 from backend.blocks.text_to_speech_block import UnrealTextToSpeechBlock
+from backend.blocks.video.narration import VideoNarrationBlock
 from backend.data.block import Block, BlockCost, BlockCostType
 from backend.integrations.credentials_store import (
    aiml_api_credentials,
    anthropic_credentials,
    apollo_credentials,
    did_credentials,
+    elevenlabs_credentials,
    enrichlayer_credentials,
    groq_credentials,
    ideogram_credentials,
@@ -640,4 +642,16 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
            },
        ),
    ],
+    VideoNarrationBlock: [
+        BlockCost(
+            cost_amount=5,  # ElevenLabs TTS cost
+            cost_filter={
+                "credentials": {
+                    "id": elevenlabs_credentials.id,
+                    "provider": elevenlabs_credentials.provider,
+                    "type": elevenlabs_credentials.type,
+                }
+            },
+        )
+    ],
 }
--- a/autogpt_platform/backend/backend/integrations/credentials_store.py
+++ b/autogpt_platform/backend/backend/integrations/credentials_store.py
@@ -224,6 +224,14 @@ openweathermap_credentials = APIKeyCredentials(
    expires_at=None,
 )

+elevenlabs_credentials = APIKeyCredentials(
+    id="f4a8b6c2-3d1e-4f5a-9b8c-7d6e5f4a3b2c",
+    provider="elevenlabs",
+    api_key=SecretStr(settings.secrets.elevenlabs_api_key),
+    title="Use Credits for ElevenLabs",
+    expires_at=None,
+)
+
 DEFAULT_CREDENTIALS = [
    ollama_credentials,
    revid_credentials,
@@ -252,6 +260,7 @@ DEFAULT_CREDENTIALS = [
    v0_credentials,
    webshare_proxy_credentials,
    openweathermap_credentials,
+    elevenlabs_credentials,
 ]

 SYSTEM_CREDENTIAL_IDS = {cred.id for cred in DEFAULT_CREDENTIALS}
@@ -366,6 +375,8 @@ class IntegrationCredentialsStore:
            all_credentials.append(webshare_proxy_credentials)
        if settings.secrets.openweathermap_api_key:
            all_credentials.append(openweathermap_credentials)
+        if settings.secrets.elevenlabs_api_key:
+            all_credentials.append(elevenlabs_credentials)
        return all_credentials

    async def get_creds_by_id(
--- a/autogpt_platform/backend/backend/integrations/providers.py
+++ b/autogpt_platform/backend/backend/integrations/providers.py
@@ -18,6 +18,7 @@ class ProviderName(str, Enum):
    DISCORD = "discord"
    D_ID = "d_id"
    E2B = "e2b"
+    ELEVENLABS = "elevenlabs"
    FAL = "fal"
    GITHUB = "github"
    GOOGLE = "google"
--- a/autogpt_platform/backend/backend/util/settings.py
+++ b/autogpt_platform/backend/backend/util/settings.py
@@ -630,6 +630,7 @@ class Secrets(UpdateTrackingModel["Secrets"], BaseSettings):
    e2b_api_key: str = Field(default="", description="E2B API key")
    nvidia_api_key: str = Field(default="", description="Nvidia API key")
    mem0_api_key: str = Field(default="", description="Mem0 API key")
+    elevenlabs_api_key: str = Field(default="", description="ElevenLabs API key")

    linear_client_id: str = Field(default="", description="Linear client ID")
    linear_client_secret: str = Field(default="", description="Linear client secret")
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
@@ -1169,6 +1169,29 @@ attrs = ">=21.3.0"
 e2b = ">=1.5.4,<2.0.0"
 httpx = ">=0.20.0,<1.0.0"

+[[package]]
+name = "elevenlabs"
+version = "1.59.0"
+description = ""
+optional = false
+python-versions = "<4.0,>=3.8"
+groups = ["main"]
+files = [
+    {file = "elevenlabs-1.59.0-py3-none-any.whl", hash = "sha256:468145db81a0bc867708b4a8619699f75583e9481b395ec1339d0b443da771ed"},
+    {file = "elevenlabs-1.59.0.tar.gz", hash = "sha256:16e735bd594e86d415dd445d249c8cc28b09996cfd627fbc10102c0a84698859"},
+]
+
+[package.dependencies]
+httpx = ">=0.21.2"
+pydantic = ">=1.9.2"
+pydantic-core = ">=2.18.2,<3.0.0"
+requests = ">=2.20"
+typing_extensions = ">=4.0.0"
+websockets = ">=11.0"
+
+[package.extras]
+pyaudio = ["pyaudio (>=0.2.14)"]
+
 [[package]]
 name = "email-validator"
 version = "2.2.0"
@@ -7361,6 +7384,28 @@ files = [
 defusedxml = ">=0.7.1,<0.8.0"
 requests = "*"

+[[package]]
+name = "yt-dlp"
+version = "2025.12.8"
+description = "A feature-rich command-line audio/video downloader"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+files = [
+    {file = "yt_dlp-2025.12.8-py3-none-any.whl", hash = "sha256:36e2584342e409cfbfa0b5e61448a1c5189e345cf4564294456ee509e7d3e065"},
+    {file = "yt_dlp-2025.12.8.tar.gz", hash = "sha256:b773c81bb6b71cb2c111cfb859f453c7a71cf2ef44eff234ff155877184c3e4f"},
+]
+
+[package.extras]
+build = ["build", "hatchling (>=1.27.0)", "pip", "setuptools (>=71.0.2)", "wheel"]
+curl-cffi = ["curl-cffi (>=0.5.10,<0.6.dev0 || >=0.10.dev0,<0.14) ; implementation_name == \"cpython\""]
+default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=2.0.2,<3)", "websockets (>=13.0)", "yt-dlp-ejs (==0.3.2)"]
+dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.14.0,<0.15.0)"]
+pyinstaller = ["pyinstaller (>=6.17.0)"]
+secretstorage = ["cffi", "secretstorage"]
+static-analysis = ["autopep8 (>=2.0,<3.0)", "ruff (>=0.14.0,<0.15.0)"]
+test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
+
 [[package]]
 name = "zerobouncesdk"
 version = "1.1.2"
@@ -7512,4 +7557,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "18b92e09596298c82432e4d0a85cb6d80a40b4229bee0a0c15f0529fd6cb21a4"
+content-hash = "a82dc5db159eb332ef6ae27d392dc1dfdeb2b70ef3595482829e51fdb9e3ffe2"
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -20,6 +20,7 @@ click = "^8.2.0"
 cryptography = "^45.0"
 discord-py = "^2.5.2"
 e2b-code-interpreter = "^1.5.2"
+elevenlabs = "^1.50.0"
 fastapi = "^0.116.1"
 feedparser = "^6.0.11"
 flake8 = "^7.3.0"
@@ -71,6 +72,7 @@ tweepy = "^4.16.0"
 uvicorn = { extras = ["standard"], version = "^0.35.0" }
 websockets = "^15.0"
 youtube-transcript-api = "^1.2.1"
+yt-dlp = "2025.12.08"
 zerobouncesdk = "^1.1.2"
 # NOTE: please insert new dependencies in their alphabetical location
 pytest-snapshot = "^0.9.0"
--- a/autogpt_platform/frontend/src/components/contextual/CredentialsInput/helpers.ts
+++ b/autogpt_platform/frontend/src/components/contextual/CredentialsInput/helpers.ts
@@ -26,6 +26,7 @@ export const providerIcons: Partial<
  nvidia: fallbackIcon,
  discord: FaDiscord,
  d_id: fallbackIcon,
+  elevenlabs: fallbackIcon,
  google_maps: FaGoogle,
  jina: fallbackIcon,
  ideogram: fallbackIcon,
--- a/backend/blocks/video/init.py
+++ b/backend/blocks/video/init.py
@@ -1 +0,0 @@
-# Video editing blocks
--- a/docs/integrations/README.md
+++ b/docs/integrations/README.md
@@ -233,6 +233,7 @@ Below is a comprehensive list of all available blocks, categorized by their prim
 | [Stagehand Extract](block-integrations/stagehand/blocks.md#stagehand-extract) | Extract structured data from a webpage |
 | [Stagehand Observe](block-integrations/stagehand/blocks.md#stagehand-observe) | Find suggested actions for your workflows |
 | [Unreal Text To Speech](block-integrations/llm.md#unreal-text-to-speech) | Converts text to speech using the Unreal Speech API |
+| [Video Narration](block-integrations/video/narration.md#video-narration) | Generate AI narration and add to video |

 ## Search and Information Retrieval

@@ -472,9 +473,13 @@ Below is a comprehensive list of all available blocks, categorized by their prim

 | Block Name | Description |
 |------------|-------------|
-| [Add Audio To Video](block-integrations/multimedia.md#add-audio-to-video) | Block to attach an audio file to a video file using moviepy |
-| [Loop Video](block-integrations/multimedia.md#loop-video) | Block to loop a video to a given duration or number of repeats |
-| [Media Duration](block-integrations/multimedia.md#media-duration) | Block to get the duration of a media file |
+| [Add Audio To Video](block-integrations/video/add_audio.md#add-audio-to-video) | Block to attach an audio file to a video file using moviepy |
+| [Loop Video](block-integrations/video/loop.md#loop-video) | Block to loop a video to a given duration or number of repeats |
+| [Media Duration](block-integrations/video/duration.md#media-duration) | Block to get the duration of a media file |
+| [Video Clip](block-integrations/video/clip.md#video-clip) | Extract a time segment from a video |
+| [Video Concat](block-integrations/video/concat.md#video-concat) | Merge multiple video clips into one continuous video |
+| [Video Download](block-integrations/video/download.md#video-download) | Download video from URL (YouTube, Vimeo, news sites, direct links) |
+| [Video Text Overlay](block-integrations/video/text_overlay.md#video-text-overlay) | Add text overlay/caption to video |

 ## Productivity

--- a/docs/integrations/SUMMARY.md
+++ b/docs/integrations/SUMMARY.md
@@ -85,7 +85,6 @@
 * [LLM](block-integrations/llm.md)
 * [Logic](block-integrations/logic.md)
 * [Misc](block-integrations/misc.md)
-* [Multimedia](block-integrations/multimedia.md)
 * [Notion Create Page](block-integrations/notion/create_page.md)
 * [Notion Read Database](block-integrations/notion/read_database.md)
 * [Notion Read Page](block-integrations/notion/read_page.md)
@@ -129,5 +128,13 @@
 * [Twitter Timeline](block-integrations/twitter/timeline.md)
 * [Twitter Tweet Lookup](block-integrations/twitter/tweet_lookup.md)
 * [Twitter User Lookup](block-integrations/twitter/user_lookup.md)
+* [Video Add Audio](block-integrations/video/add_audio.md)
+* [Video Clip](block-integrations/video/clip.md)
+* [Video Concat](block-integrations/video/concat.md)
+* [Video Download](block-integrations/video/download.md)
+* [Video Duration](block-integrations/video/duration.md)
+* [Video Loop](block-integrations/video/loop.md)
+* [Video Narration](block-integrations/video/narration.md)
+* [Video Text Overlay](block-integrations/video/text_overlay.md)
 * [Wolfram LLM API](block-integrations/wolfram/llm_api.md)
 * [Zerobounce Validate Emails](block-integrations/zerobounce/validate_emails.md)
--- a/docs/integrations/block-integrations/multimedia.md
+++ b/docs/integrations/block-integrations/multimedia.md
@@ -1,119 +0,0 @@
-# Multimedia
-<!-- MANUAL: file_description -->
-Blocks for processing and manipulating video and audio files.
-<!-- END MANUAL -->
-
-## Add Audio To Video
-
-### What it is
-Block to attach an audio file to a video file using moviepy.
-
-### How it works
-<!-- MANUAL: how_it_works -->
-This block combines a video file with an audio file using the moviepy library. The audio track is attached to the video, optionally with volume adjustment via the volume parameter (1.0 = original volume).
-
-Input files can be URLs, data URIs, or local paths. The output can be returned as either a file path or base64 data URI.
-<!-- END MANUAL -->
-
-### Inputs
-
-| Input | Description | Type | Required |
-|-------|-------------|------|----------|
-| video_in | Video input (URL, data URI, or local path). | str (file) | Yes |
-| audio_in | Audio input (URL, data URI, or local path). | str (file) | Yes |
-| volume | Volume scale for the newly attached audio track (1.0 = original). | float | No |
-| output_return_type | Return the final output as a relative path or base64 data URI. | "file_path" \| "data_uri" | No |
-
-### Outputs
-
-| Output | Description | Type |
-|--------|-------------|------|
-| error | Error message if the operation failed | str |
-| video_out | Final video (with attached audio), as a path or data URI. | str (file) |
-
-### Possible use case
-<!-- MANUAL: use_case -->
-**Add Voiceover**: Combine generated voiceover audio with video content for narrated videos.
-
-**Background Music**: Add music tracks to silent videos or replace existing audio.
-
-**Audio Replacement**: Swap the audio track of a video for localization or accessibility.
-<!-- END MANUAL -->
-
---
-
-## Loop Video
-
-### What it is
-Block to loop a video to a given duration or number of repeats.
-
-### How it works
-<!-- MANUAL: how_it_works -->
-This block extends a video by repeating it to reach a target duration or number of loops. Set duration to specify the total length in seconds, or use n_loops to repeat the video a specific number of times.
-
-The looped video is seamlessly concatenated and can be output as a file path or base64 data URI.
-<!-- END MANUAL -->
-
-### Inputs
-
-| Input | Description | Type | Required |
-|-------|-------------|------|----------|
-| video_in | The input video (can be a URL, data URI, or local path). | str (file) | Yes |
-| duration | Target duration (in seconds) to loop the video to. If omitted, defaults to no looping. | float | No |
-| n_loops | Number of times to repeat the video. If omitted, defaults to 1 (no repeat). | int | No |
-| output_return_type | How to return the output video. Either a relative path or base64 data URI. | "file_path" \| "data_uri" | No |
-
-### Outputs
-
-| Output | Description | Type |
-|--------|-------------|------|
-| error | Error message if the operation failed | str |
-| video_out | Looped video returned either as a relative path or a data URI. | str |
-
-### Possible use case
-<!-- MANUAL: use_case -->
-**Background Videos**: Loop short clips to match the duration of longer audio or content.
-
-**GIF-Like Content**: Create seamlessly looping video content for social media.
-
-**Filler Content**: Extend short video clips to meet minimum duration requirements.
-<!-- END MANUAL -->
-
---
-
-## Media Duration
-
-### What it is
-Block to get the duration of a media file.
-
-### How it works
-<!-- MANUAL: how_it_works -->
-This block analyzes a media file and returns its duration in seconds. Set is_video to true for video files or false for audio files to ensure proper parsing.
-
-The input can be a URL, data URI, or local file path. The duration is returned as a float for precise timing calculations.
-<!-- END MANUAL -->
-
-### Inputs
-
-| Input | Description | Type | Required |
-|-------|-------------|------|----------|
-| media_in | Media input (URL, data URI, or local path). | str (file) | Yes |
-| is_video | Whether the media is a video (True) or audio (False). | bool | No |
-
-### Outputs
-
-| Output | Description | Type |
-|--------|-------------|------|
-| error | Error message if the operation failed | str |
-| duration | Duration of the media file (in seconds). | float |
-
-### Possible use case
-<!-- MANUAL: use_case -->
-**Video Processing Prep**: Get video duration before deciding how to loop, trim, or synchronize it.
-
-**Audio Matching**: Determine audio length to generate matching-length video content.
-
-**Content Validation**: Verify that uploaded media meets duration requirements.
-<!-- END MANUAL -->
-
---
--- a/docs/integrations/block-integrations/video/add_audio.md
+++ b/docs/integrations/block-integrations/video/add_audio.md
@@ -0,0 +1,41 @@
+
+# Video Add Audio
+<!-- MANUAL: file_description -->
+This block allows you to attach a separate audio track to a video file, replacing or combining with the original audio.
+<!-- END MANUAL -->
+
+## Add Audio To Video
+
+### What it is
+Block to attach an audio file to a video file using moviepy.
+
+### How it works
+<!-- MANUAL: how_it_works -->
+The block uses MoviePy to combine video and audio files. It loads the video and audio inputs (which can be URLs, data URIs, or local paths), optionally scales the audio volume, then writes the combined result to a new video file using H.264 video codec and AAC audio codec.
+<!-- END MANUAL -->
+
+### Inputs
+
+| Input | Description | Type | Required |
+|-------|-------------|------|----------|
+| video_in | Video input (URL, data URI, or local path). | str (file) | Yes |
+| audio_in | Audio input (URL, data URI, or local path). | str (file) | Yes |
+| volume | Volume scale for the newly attached audio track (1.0 = original). | float | No |
+| output_return_type | Return the final output as a relative path or base64 data URI. | "file_path" \| "data_uri" | No |
+
+### Outputs
+
+| Output | Description | Type |
+|--------|-------------|------|
+| error | Error message if the operation failed | str |
+| video_out | Final video (with attached audio), as a path or data URI. | str (file) |
+
+### Possible use case
+<!-- MANUAL: use_case -->
+- Adding background music to a silent screen recording
+- Replacing original audio with a voiceover or translated audio track
+- Combining AI-generated speech with stock footage
+- Adding sound effects to video content
+<!-- END MANUAL -->
+
+---
--- a/docs/integrations/block-integrations/video/clip.md
+++ b/docs/integrations/block-integrations/video/clip.md
@@ -0,0 +1,42 @@
+# Video Clip
+<!-- MANUAL: file_description -->
+This block extracts a specific time segment from a video file, allowing you to trim videos to precise start and end times.
+<!-- END MANUAL -->
+
+## Video Clip
+
+### What it is
+Extract a time segment from a video
+
+### How it works
+<!-- MANUAL: how_it_works -->
+The block uses MoviePy's `subclipped` function to extract a portion of the video between specified start and end times. It validates that end time is greater than start time, then creates a new video file containing only the selected segment. The output is encoded with H.264 video codec and AAC audio codec, preserving both video and audio from the original clip.
+<!-- END MANUAL -->
+
+### Inputs
+
+| Input | Description | Type | Required |
+|-------|-------------|------|----------|
+| video_in | Input video (URL, data URI, or local path) | str (file) | Yes |
+| start_time | Start time in seconds | float | Yes |
+| end_time | End time in seconds | float | Yes |
+| output_format | Output format | "mp4" \| "webm" \| "mkv" \| "mov" | No |
+| output_return_type | Return the output as a relative path or base64 data URI. | "file_path" \| "data_uri" | No |
+
+### Outputs
+
+| Output | Description | Type |
+|--------|-------------|------|
+| error | Error message if the operation failed | str |
+| video_out | Clipped video file (path or data URI) | str (file) |
+| duration | Clip duration in seconds | float |
+
+### Possible use case
+<!-- MANUAL: use_case -->
+- Extracting highlights from a longer video
+- Trimming intro/outro from recorded content
+- Creating short clips for social media from longer videos
+- Isolating specific segments for further processing in a workflow
+<!-- END MANUAL -->
+
+---
--- a/docs/integrations/block-integrations/video/concat.md
+++ b/docs/integrations/block-integrations/video/concat.md
@@ -0,0 +1,42 @@
+# Video Concat
+<!-- MANUAL: file_description -->
+This block merges multiple video clips into a single continuous video, with optional transitions between clips.
+<!-- END MANUAL -->
+
+## Video Concat
+
+### What it is
+Merge multiple video clips into one continuous video
+
+### How it works
+<!-- MANUAL: how_it_works -->
+The block uses MoviePy's `concatenate_videoclips` function to join multiple videos in sequence. It supports three transition modes: **none** (direct concatenation), **crossfade** (smooth blending where clips overlap), and **fade_black** (each clip fades out to black and the next fades in). At least 2 videos are required. The output is encoded with H.264 video codec and AAC audio codec.
+<!-- END MANUAL -->
+
+### Inputs
+
+| Input | Description | Type | Required |
+|-------|-------------|------|----------|
+| videos | List of video files to concatenate (in order) | List[str (file)] | Yes |
+| transition | Transition between clips | "none" \| "crossfade" \| "fade_black" | No |
+| transition_duration | Transition duration in seconds | int | No |
+| output_format | Output format | "mp4" \| "webm" \| "mkv" \| "mov" | No |
+| output_return_type | Return the output as a relative path or base64 data URI. | "file_path" \| "data_uri" | No |
+
+### Outputs
+
+| Output | Description | Type |
+|--------|-------------|------|
+| error | Error message if the operation failed | str |
+| video_out | Concatenated video file (path or data URI) | str (file) |
+| total_duration | Total duration in seconds | float |
+
+### Possible use case
+<!-- MANUAL: use_case -->
+- Combining multiple clips into a compilation video
+- Assembling intro, main content, and outro segments
+- Creating montages from multiple source videos
+- Building video playlists or slideshows with transitions
+<!-- END MANUAL -->
+
+---
--- a/docs/integrations/block-integrations/video/download.md
+++ b/docs/integrations/block-integrations/video/download.md
@@ -0,0 +1,43 @@
+# Video Download
+<!-- MANUAL: file_description -->
+This block downloads videos from URLs, supporting a wide range of video platforms and direct links.
+<!-- END MANUAL -->
+
+## Video Download
+
+### What it is
+Download video from URL (YouTube, Vimeo, news sites, direct links)
+
+### How it works
+<!-- MANUAL: how_it_works -->
+The block uses yt-dlp, a powerful video downloading library that supports over 1000 websites. It accepts a URL, quality preference, and output format, then downloads the video while merging the best available video and audio streams for the selected quality. Quality options: **best** (highest available), **1080p/720p/480p** (maximum resolution at that height), **audio_only** (extracts just the audio track).
+<!-- END MANUAL -->
+
+### Inputs
+
+| Input | Description | Type | Required |
+|-------|-------------|------|----------|
+| url | URL of the video to download (YouTube, Vimeo, direct link, etc.) | str | Yes |
+| quality | Video quality preference | "best" \| "1080p" \| "720p" \| "480p" \| "audio_only" | No |
+| output_format | Output video format | "mp4" \| "webm" \| "mkv" | No |
+| output_return_type | Return the output as a relative path or base64 data URI. | "file_path" \| "data_uri" | No |
+
+### Outputs
+
+| Output | Description | Type |
+|--------|-------------|------|
+| error | Error message if the operation failed | str |
+| video_file | Downloaded video (path or data URI) | str (file) |
+| duration | Video duration in seconds | float |
+| title | Video title from source | str |
+| source_url | Original source URL | str |
+
+### Possible use case
+<!-- MANUAL: use_case -->
+- Downloading source videos for editing or remixing
+- Archiving video content for offline processing
+- Extracting audio from videos for transcription or podcast creation
+- Gathering video content for automated content pipelines
+<!-- END MANUAL -->
+
+---
--- a/docs/integrations/block-integrations/video/duration.md
+++ b/docs/integrations/block-integrations/video/duration.md
@@ -0,0 +1,38 @@
+# Video Duration
+<!-- MANUAL: file_description -->
+This block retrieves the duration of video or audio files, useful for planning and conditional logic in media workflows.
+<!-- END MANUAL -->
+
+## Media Duration
+
+### What it is
+Block to get the duration of a media file.
+
+### How it works
+<!-- MANUAL: how_it_works -->
+The block uses MoviePy to load the media file and extract its duration property. It supports both video files (using VideoFileClip) and audio files (using AudioFileClip), determined by the `is_video` flag. The media can be provided as a URL, data URI, or local file path. The duration is returned in seconds as a floating-point number.
+<!-- END MANUAL -->
+
+### Inputs
+
+| Input | Description | Type | Required |
+|-------|-------------|------|----------|
+| media_in | Media input (URL, data URI, or local path). | str (file) | Yes |
+| is_video | Whether the media is a video (True) or audio (False). | bool | No |
+
+### Outputs
+
+| Output | Description | Type |
+|--------|-------------|------|
+| error | Error message if the operation failed | str |
+| duration | Duration of the media file (in seconds). | float |
+
+### Possible use case
+<!-- MANUAL: use_case -->
+- Checking video length before processing to avoid timeout issues
+- Calculating how many times to loop a video to reach a target duration
+- Validating that uploaded content meets length requirements
+- Building conditional workflows based on media duration
+<!-- END MANUAL -->
+
+---
--- a/docs/integrations/block-integrations/video/loop.md
+++ b/docs/integrations/block-integrations/video/loop.md
@@ -0,0 +1,40 @@
+# Video Loop
+<!-- MANUAL: file_description -->
+This block repeats a video to extend its duration, either to a specific length or a set number of repetitions.
+<!-- END MANUAL -->
+
+## Loop Video
+
+### What it is
+Block to loop a video to a given duration or number of repeats.
+
+### How it works
+<!-- MANUAL: how_it_works -->
+The block uses MoviePy's Loop effect to repeat a video clip. You can specify either a target duration (the video will repeat until reaching that length) or a number of loops (the video will repeat that many times). The Loop effect handles both video and audio looping automatically, maintaining sync. Either `duration` or `n_loops` must be provided. The output is encoded with H.264 video codec and AAC audio codec.
+<!-- END MANUAL -->
+
+### Inputs
+
+| Input | Description | Type | Required |
+|-------|-------------|------|----------|
+| video_in | The input video (can be a URL, data URI, or local path). | str (file) | Yes |
+| duration | Target duration (in seconds) to loop the video to. If omitted, defaults to no looping. | float | No |
+| n_loops | Number of times to repeat the video. If omitted, defaults to 1 (no repeat). | int | No |
+| output_return_type | How to return the output video. Either a relative path or base64 data URI. | "file_path" \| "data_uri" | No |
+
+### Outputs
+
+| Output | Description | Type |
+|--------|-------------|------|
+| error | Error message if the operation failed | str |
+| video_out | Looped video returned either as a relative path or a data URI. | str |
+
+### Possible use case
+<!-- MANUAL: use_case -->
+- Extending a short background video to match the length of narration audio
+- Creating seamless looping content for digital signage
+- Repeating a product demo video multiple times for emphasis
+- Extending short clips to meet minimum duration requirements for platforms
+<!-- END MANUAL -->
+
+---
--- a/docs/integrations/block-integrations/video/narration.md
+++ b/docs/integrations/block-integrations/video/narration.md
@@ -0,0 +1,44 @@
+# Video Narration
+<!-- MANUAL: file_description -->
+This block generates AI voiceover narration using ElevenLabs and adds it to a video, with flexible audio mixing options.
+<!-- END MANUAL -->
+
+## Video Narration
+
+### What it is
+Generate AI narration and add to video
+
+### How it works
+<!-- MANUAL: how_it_works -->
+The block uses ElevenLabs text-to-speech API to generate natural-sounding narration from your script. It then combines the narration with the video using MoviePy. Three audio mixing modes are available: **replace** (completely replaces original audio), **mix** (blends narration with original audio at configurable volumes), and **ducking** (similar to mix but applies stronger attenuation to original audio, making narration more prominent). The block outputs both the final video and the generated audio file separately.
+<!-- END MANUAL -->
+
+### Inputs
+
+| Input | Description | Type | Required |
+|-------|-------------|------|----------|
+| video_in | Input video (URL, data URI, or local path) | str (file) | Yes |
+| script | Narration script text | str | Yes |
+| voice_id | ElevenLabs voice ID | str | No |
+| mix_mode | How to combine with original audio. 'ducking' applies stronger attenuation than 'mix'. | "replace" \| "mix" \| "ducking" | No |
+| narration_volume | Narration volume (0.0 to 2.0) | float | No |
+| original_volume | Original audio volume when mixing (0.0 to 1.0) | float | No |
+| output_return_type | Return the output as a relative path or base64 data URI. | "file_path" \| "data_uri" | No |
+
+### Outputs
+
+| Output | Description | Type |
+|--------|-------------|------|
+| error | Error message if the operation failed | str |
+| video_out | Video with narration (path or data URI) | str (file) |
+| audio_file | Generated audio file (path or data URI) | str (file) |
+
+### Possible use case
+<!-- MANUAL: use_case -->
+- Adding professional voiceover to product demos or tutorials
+- Creating narrated explainer videos from screen recordings
+- Generating multi-language versions of video content
+- Adding commentary to gameplay or walkthrough videos
+<!-- END MANUAL -->
+
+---
--- a/docs/integrations/block-integrations/video/text_overlay.md
+++ b/docs/integrations/block-integrations/video/text_overlay.md
@@ -0,0 +1,45 @@
+# Video Text Overlay
+<!-- MANUAL: file_description -->
+This block adds customizable text captions or titles to videos, with control over positioning, timing, and styling.
+<!-- END MANUAL -->
+
+## Video Text Overlay
+
+### What it is
+Add text overlay/caption to video
+
+### How it works
+<!-- MANUAL: how_it_works -->
+The block uses MoviePy's TextClip and CompositeVideoClip to render text onto video frames. The text is created as a separate clip with configurable font size, color, and optional background color, then composited over the video at the specified position. Timing can be controlled to show text only during specific portions of the video. Position options include center alignments (top, center, bottom) and corner positions (top-left, top-right, bottom-left, bottom-right). The output is encoded with H.264 video codec and AAC audio codec.
+<!-- END MANUAL -->
+
+### Inputs
+
+| Input | Description | Type | Required |
+|-------|-------------|------|----------|
+| video_in | Input video (URL, data URI, or local path) | str (file) | Yes |
+| text | Text to overlay on video | str | Yes |
+| position | Position of text on screen | "top" \| "center" \| "bottom" \| "top-left" \| "top-right" \| "bottom-left" \| "bottom-right" | No |
+| start_time | When to show text (seconds). None = entire video | float | No |
+| end_time | When to hide text (seconds). None = until end | float | No |
+| font_size | Font size | int | No |
+| font_color | Font color (hex or name) | str | No |
+| bg_color | Background color behind text (None for transparent) | str | No |
+| output_return_type | Return the output as a relative path or base64 data URI. | "file_path" \| "data_uri" | No |
+
+### Outputs
+
+| Output | Description | Type |
+|--------|-------------|------|
+| error | Error message if the operation failed | str |
+| video_out | Video with text overlay (path or data URI) | str (file) |
+
+### Possible use case
+<!-- MANUAL: use_case -->
+- Adding titles or chapter headings to video content
+- Creating lower-thirds with speaker names or captions
+- Watermarking videos with branding text
+- Adding call-to-action text at specific moments in a video
+<!-- END MANUAL -->
+
+---
Author	SHA1	Message	Date
Nicholas Tindle	6d6d3b820e	feat(video): add model_id to VideoNarrationBlock for enhanced TTS model selection	2026-01-23 14:20:04 -06:00
Nicholas Tindle	8b5c018032	feat(video): add video codec utility and update video processing blocks for codec handling	2026-01-23 13:52:11 -06:00
Nicholas Tindle	b5611b00b3	feat(video): update video processing blocks and documentation for enhanced functionality	2026-01-23 13:27:34 -06:00
Nicholas Tindle	6cd62c4d50	Merge branch 'dev' into feature/video-editing-blocks	2026-01-23 12:39:34 -06:00
Nicholas Tindle	9f4c33a695	feat(video): refactor video storage methods for improved testability across blocks	2026-01-23 12:36:28 -06:00
Nicholas Tindle	b0debe9488	Merge branch 'feature/video-editing-blocks' of https://github.com/Significant-Gravitas/AutoGPT into feature/video-editing-blocks	2026-01-23 12:16:34 -06:00
Nicholas Tindle	b20767bde9	feat(blocks): add ElevenLabs integration and enhance video processing blocks with media file handling	2026-01-23 12:15:59 -06:00
claude[bot]	b9a9481381	chore(backend): regenerate poetry.lock file Co-authored-by: Nicholas Tindle <ntindle@users.noreply.github.com>	2026-01-23 01:43:25 +00:00
Nicholas Tindle	d2d2a0c0c9	feat(backend): integrate ElevenLabs for video narration and add cost configuration - Implemented ElevenLabs API integration for generating AI narration in videos. - Updated VideoNarrationBlock to handle audio generation and mixing with video. - Added ElevenLabs credentials to the credentials store. - Configured block costs for using ElevenLabs TTS. - Enhanced video processing blocks (concat, download, text overlay) for improved functionality. - Updated dependencies in poetry.lock for ElevenLabs SDK and yt-dlp. - Added provider icon for ElevenLabs in frontend credentials input.	2026-01-22 19:26:39 -06:00
Nicholas Tindle	521f69220d	feat(blocks): export all 8 video blocks from module Includes migrated blocks from media.py: - MediaDurationBlock - LoopVideoBlock - AddAudioToVideoBlock	2026-01-22 13:55:22 -06:00
Nicholas Tindle	368adc985d	feat(blocks): migrate AddAudioToVideoBlock from media.py Per review feedback from @majdyz - consolidating video blocks	2026-01-22 13:55:03 -06:00
Nicholas Tindle	8c3216f0a2	feat(blocks): migrate LoopVideoBlock from media.py Per review feedback from @majdyz - consolidating video blocks	2026-01-22 13:55:02 -06:00
Nicholas Tindle	94063616e5	feat(blocks): migrate MediaDurationBlock from media.py Per review feedback from @majdyz - consolidating video blocks	2026-01-22 13:55:00 -06:00
Nicholas Tindle	2433a86cb1	fix(blocks): correct import paths in video __init__.py	2026-01-22 13:52:26 -06:00
Nicholas Tindle	0ede203f8e	feat(blocks): add VideoNarrationBlock - Move imports to top level - Use tempfile for secure temp paths - Add exception chaining (from e) - Close AudioFileClip in finally block - Document that ducking = reduced volume mix - Extract helper method for test mocking - Proper resource cleanup	2026-01-22 13:52:10 -06:00
Nicholas Tindle	dc751316c5	feat(blocks): add VideoTextOverlayBlock - Move imports to top level - Use tempfile for secure temp paths - Add exception chaining (from e) - Add start_time/end_time validation - Extract helper method for test mocking - Proper resource cleanup in finally	2026-01-22 13:51:37 -06:00
Nicholas Tindle	e7fb54e6af	feat(blocks): add VideoDownloadBlock - Move imports to top level - Use tempfile for secure temp paths - Add exception chaining (from e) - Extract helper method for test mocking	2026-01-22 13:51:04 -06:00
Nicholas Tindle	7b76f4d1e4	feat(blocks): add VideoConcatBlock - Move imports to top level - Use tempfile for secure temp paths - Add exception chaining (from e) - Constrain output_format to enum - Add ge=0.0 to transition_duration - Extract helper method for test mocking - Proper resource cleanup in finally	2026-01-22 13:50:35 -06:00
Nicholas Tindle	3cc56de0fa	feat(blocks): add VideoClipBlock - Move imports to top level - Use tempfile for secure temp paths - Add exception chaining (from e) - Constrain output_format to enum - Extract helper method for test mocking - Proper resource cleanup in finally	2026-01-22 13:50:12 -06:00
Nicholas Tindle	d2bead0f7a	feat(blocks): create video module with all blocks Consolidate video editing blocks into dedicated module. Migrate blocks from media.py per review feedback. Addresses: @majdyz review comment	2026-01-22 13:49:48 -06:00
claude[bot]	f8d3893c16	fix(blocks): Address review feedback for video editing blocks - Add start_time < end_time validation in VideoClipBlock and VideoTextOverlayBlock - Fix resource leaks: close AudioFileClip in narration.py, TextClip in text_overlay.py - Fix concat.py: proper resource cleanup in finally block, load clips individually - Implement proper crossfade using crossfadein/crossfadeout - Implement ducking mode with stronger attenuation (0.3x original_volume) - Remove unused start_time/end_time params from VideoDownloadBlock - Fix None handling for duration/title in download.py (use 'or' instead of 'get' default) - Add exception chaining with 'from e' in all blocks - Add minimum clips validation in VideoConcatBlock - Sort __all__ in __init__.py - Increase ElevenLabs API timeout to 120s for longer scripts Co-authored-by: Nicholas Tindle <ntindle@users.noreply.github.com>	2026-01-18 23:27:04 +00:00
Nicholas Tindle	1cfbc0dd08	feat(video): Update __init__.py with full exports	2026-01-18 15:34:04 -06:00
Nicholas Tindle	ff84643b48	feat(video): Add VideoNarrationBlock	2026-01-18 15:33:48 -06:00
Nicholas Tindle	c19c3c834a	feat(video): Add VideoTextOverlayBlock	2026-01-18 15:33:47 -06:00
Nicholas Tindle	d0f7ba8cfd	feat(video): Add VideoConcatBlock	2026-01-18 15:33:46 -06:00
Nicholas Tindle	2a855f4bd0	feat(video): Add VideoClipBlock	2026-01-18 15:32:59 -06:00
Nicholas Tindle	b93bb3b9f8	feat(video): Add VideoDownloadBlock	2026-01-18 15:32:58 -06:00