Merge branch 'dev' into codex/add-edit-video-and-transcribe-video-blocks

fix(blocks): add missing user_id parameter to video blocks
Add required user_id parameter to TranscribeVideoBlock and EditVideoByTextBlock run methods, and pass it to store_media_file() calls to fix block test failures. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Nicholas Tindle <ntindle@users.noreply.github.com>
2026-01-20 20:48:11 -05:00 · 2026-01-16 15:05:33 -06:00 · 2026-01-16 17:57:14 +00:00 · 2026-01-16 17:57:11 +00:00 · 2026-01-16 17:57:07 +00:00 · 2026-01-16 17:57:03 +00:00
5 changed files with 350 additions and 0 deletions
--- a/autogpt_platform/backend/backend/blocks/edit_video_by_text.py
+++ b/autogpt_platform/backend/backend/blocks/edit_video_by_text.py
@@ -0,0 +1,162 @@
+from __future__ import annotations
+
+import base64
+from typing import Literal
+
+from pydantic import SecretStr
+from replicate.client import Client as ReplicateClient
+from replicate.helpers import FileOutput
+
+from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
+from backend.data.model import (
+    APIKeyCredentials,
+    CredentialsField,
+    CredentialsMetaInput,
+    SchemaField,
+)
+from backend.integrations.providers import ProviderName
+from backend.util.file import get_exec_file_path, store_media_file
+from backend.util.type import MediaFileType
+
+TEST_CREDENTIALS = APIKeyCredentials(
+    id="01234567-89ab-cdef-0123-456789abcdef",
+    provider="replicate",
+    api_key=SecretStr("mock-replicate-api-key"),
+    title="Mock Replicate API key",
+    expires_at=None,
+)
+TEST_CREDENTIALS_INPUT = {
+    "provider": TEST_CREDENTIALS.provider,
+    "id": TEST_CREDENTIALS.id,
+    "type": TEST_CREDENTIALS.type,
+    "title": TEST_CREDENTIALS.title,
+}
+
+
+class EditVideoByTextBlock(Block):
+    class Input(BlockSchema):
+        credentials: CredentialsMetaInput[
+            Literal[ProviderName.REPLICATE], Literal["api_key"]
+        ] = CredentialsField(
+            description="The Replicate integration can be used with "
+            "any API key with sufficient permissions for the blocks it is used on.",
+        )
+        video_in: MediaFileType = SchemaField(
+            description="Video file to edit",
+        )
+        transcription: str = SchemaField(
+            description="Desired transcript for the output video",
+        )
+        split_at: str = SchemaField(
+            description="Granularity for transcript matching",
+            default="word",
+        )
+
+    class Output(BlockSchema):
+        video_url: str = SchemaField(
+            description="URL of the edited video",
+        )
+        transcription: str = SchemaField(
+            description="Transcription used for editing",
+        )
+        error: str = SchemaField(
+            description="Error message if something fails",
+            default="",
+        )
+
+    def __init__(self) -> None:
+        super().__init__(
+            id="98d40049-a1de-465f-bba1-47411298ad1a",
+            description="Edits a video by modifying its transcript.",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=EditVideoByTextBlock.Input,
+            output_schema=EditVideoByTextBlock.Output,
+            test_input={
+                "credentials": TEST_CREDENTIALS_INPUT,
+                "video_in": "data:video/mp4;base64,AAAA",
+                "transcription": "edited transcript",
+            },
+            test_output=[
+                ("video_url", "https://replicate.com/output/video.mp4"),
+                ("transcription", "edited transcript"),
+            ],
+            test_mock={
+                "edit_video": lambda file_path, transcription, split_at, api_key: "https://replicate.com/output/video.mp4"
+            },
+            test_credentials=TEST_CREDENTIALS,
+        )
+
+    async def edit_video(
+        self, file_path: str, transcription: str, split_at: str, api_key: SecretStr
+    ) -> str:
+        """Use Replicate's API to edit the video."""
+        try:
+            client = ReplicateClient(api_token=api_key.get_secret_value())
+
+            # Convert file path to file URL
+            with open(file_path, "rb") as f:
+                file_data = f.read()
+                file_b64 = base64.b64encode(file_data).decode()
+                file_url = f"data:video/mp4;base64,{file_b64}"
+
+            output = await client.async_run(
+                "jd7h/edit-video-by-editing-text:e010b880347314d07e3ce3b21cbd4c57add51fea3474677a6cb1316751c4cb90",
+                input={
+                    "mode": "edit",
+                    "video_in": file_url,
+                    "transcription": transcription,
+                    "split_at": split_at,
+                },
+                wait=False,
+            )
+
+            # Get video URL from output
+            if isinstance(output, dict) and "video" in output:
+                video_output = output["video"]
+                if isinstance(video_output, FileOutput):
+                    return video_output.url
+                return str(video_output)
+            elif isinstance(output, list) and len(output) > 0:
+                video_url = output[0]
+                if isinstance(video_url, FileOutput):
+                    return video_url.url
+                return str(video_url)
+            elif isinstance(output, FileOutput):
+                return output.url
+            elif isinstance(output, str):
+                return output
+
+            raise ValueError(f"Unexpected output format from Replicate API: {output}")
+        except Exception:
+            raise
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        credentials: APIKeyCredentials,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        try:
+            local_path = await store_media_file(
+                graph_exec_id=graph_exec_id,
+                file=input_data.video_in,
+                user_id=user_id,
+                return_content=False,
+            )
+            abs_path = get_exec_file_path(graph_exec_id, local_path)
+
+            video_url = await self.edit_video(
+                abs_path,
+                input_data.transcription,
+                input_data.split_at,
+                credentials.api_key,
+            )
+
+            yield "video_url", video_url
+            yield "transcription", input_data.transcription
+        except Exception as e:
+            error_msg = f"Failed to edit video: {str(e)}"
+            yield "error", error_msg
--- a/autogpt_platform/backend/backend/blocks/transcribe_video.py
+++ b/autogpt_platform/backend/backend/blocks/transcribe_video.py
@@ -0,0 +1,135 @@
+from __future__ import annotations
+
+import base64
+from typing import Literal
+
+from pydantic import SecretStr
+from replicate.client import Client as ReplicateClient
+from replicate.helpers import FileOutput
+
+from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
+from backend.data.model import (
+    APIKeyCredentials,
+    CredentialsField,
+    CredentialsMetaInput,
+    SchemaField,
+)
+from backend.integrations.providers import ProviderName
+from backend.util.file import get_exec_file_path, store_media_file
+from backend.util.type import MediaFileType
+
+TEST_CREDENTIALS = APIKeyCredentials(
+    id="01234567-89ab-cdef-0123-456789abcdef",
+    provider="replicate",
+    api_key=SecretStr("mock-replicate-api-key"),
+    title="Mock Replicate API key",
+    expires_at=None,
+)
+TEST_CREDENTIALS_INPUT = {
+    "provider": TEST_CREDENTIALS.provider,
+    "id": TEST_CREDENTIALS.id,
+    "type": TEST_CREDENTIALS.type,
+    "title": TEST_CREDENTIALS.title,
+}
+
+
+class TranscribeVideoBlock(Block):
+    class Input(BlockSchema):
+        credentials: CredentialsMetaInput[
+            Literal[ProviderName.REPLICATE], Literal["api_key"]
+        ] = CredentialsField(
+            description="The Replicate integration can be used with "
+            "any API key with sufficient permissions for the blocks it is used on.",
+        )
+        video_in: MediaFileType = SchemaField(
+            description="Video file to transcribe",
+        )
+
+    class Output(BlockSchema):
+        transcription: str = SchemaField(
+            description="Text transcription of the video",
+        )
+        error: str = SchemaField(
+            description="Error message if something fails",
+            default="",
+        )
+
+    def __init__(self) -> None:
+        super().__init__(
+            id="fa49dad0-a5fc-441c-ba04-2ac206e392d8",
+            description="Transcribes speech from a video file.",
+            categories={BlockCategory.MULTIMEDIA},
+            input_schema=TranscribeVideoBlock.Input,
+            output_schema=TranscribeVideoBlock.Output,
+            test_input={
+                "credentials": TEST_CREDENTIALS_INPUT,
+                "video_in": "data:video/mp4;base64,AAAA",
+            },
+            test_output=("transcription", "example transcript"),
+            test_mock={"transcribe": lambda file_path, api_key: "example transcript"},
+            test_credentials=TEST_CREDENTIALS,
+        )
+
+    async def transcribe(self, file_path: str, api_key: SecretStr) -> str:
+        """Use Replicate's API to transcribe the video."""
+        try:
+            client = ReplicateClient(api_token=api_key.get_secret_value())
+
+            # Convert file path to file URL
+            with open(file_path, "rb") as f:
+                file_data = f.read()
+                file_b64 = base64.b64encode(file_data).decode()
+                file_url = f"data:video/mp4;base64,{file_b64}"
+
+            output = await client.async_run(
+                "jd7h/edit-video-by-editing-text:e010b880347314d07e3ce3b21cbd4c57add51fea3474677a6cb1316751c4cb90",
+                input={
+                    "mode": "transcribe",
+                    "video_in": file_url,
+                },
+                wait=False,
+            )
+
+            # Handle dictionary response format
+            if isinstance(output, dict):
+                if "transcription" in output:
+                    return output["transcription"]
+                elif "error" in output:
+                    raise ValueError(f"API returned error: {output['error']}")
+            # Handle list/string formats as before
+            elif isinstance(output, list) and len(output) > 0:
+                if isinstance(output[0], FileOutput):
+                    return output[0].url
+                return output[0]
+            elif isinstance(output, FileOutput):
+                return output.url
+            elif isinstance(output, str):
+                return output
+
+            raise ValueError(f"Unexpected output format from Replicate API: {output}")
+        except Exception:
+            raise
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        credentials: APIKeyCredentials,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        try:
+            local_path = await store_media_file(
+                graph_exec_id=graph_exec_id,
+                file=input_data.video_in,
+                user_id=user_id,
+                return_content=False,
+            )
+            abs_path = get_exec_file_path(graph_exec_id, local_path)
+
+            transcript = await self.transcribe(abs_path, credentials.api_key)
+            yield "transcription", transcript
+        except Exception as e:
+            error_msg = f"Failed to transcribe video: {str(e)}"
+            yield "error", error_msg
--- a/docs/platform/blocks/blocks.md
+++ b/docs/platform/blocks/blocks.md
@@ -93,11 +93,13 @@ Below is a comprehensive list of all available blocks, categorized by their prim
 | [AI Shortform Video Creator](ai_shortform_video_block.md#ai-shortform-video-creator) | Generates short-form videos using AI |
 | [Replicate Flux Advanced Model](replicate_flux_advanced.md#replicate-flux-advanced-model) | Creates images using Replicate's Flux models |
 | [Flux Kontext](flux_kontext.md#flux-kontext) | Text-based image editing using Flux Kontext |
+| [Edit Video by Text](edit_video_by_text.md#edit-video-by-text) | Edit videos by modifying their transcript |

 ## Miscellaneous
 | Block Name | Description |
 |------------|-------------|
 | [Transcribe YouTube Video](youtube.md#transcribe-youtube-video) | Transcribes audio from YouTube videos |
+| [Transcribe Video](transcribe_video.md#transcribe-video) | Converts speech in a video file to text |
 | [Send Email](email_block.md#send-email) | Sends emails using SMTP |
 | [Condition Block](branching.md#condition-block) | Evaluates conditions for workflow branching |
 | [Step Through Items](iteration.md#step-through-items) | Iterates through lists or dictionaries |
--- a/docs/platform/blocks/edit_video_by_text.md
+++ b/docs/platform/blocks/edit_video_by_text.md
@@ -0,0 +1,27 @@
+## Edit Video by Text
+
+### What it is
+A block that edits a video by cutting segments based on an edited transcript.
+
+### What it does
+After providing a target transcript, the block removes portions of the video that no longer appear in the text, returning a new edited video file.
+
+### How it works
+The block compares the supplied transcript with the video's original transcript. Segments that are missing from the target transcript are removed. Word-level matching is used by default.
+
+### Inputs
+| Input | Description |
+|-------|-------------|
+| Video | The original video file to edit. |
+| Transcription | The desired transcript of the output video. |
+| Split At | Level of precision for transcript matching ("word" or "character"). |
+
+### Outputs
+| Output | Description |
+|--------|-------------|
+| Video | Path to the edited video. |
+| Transcription | The transcript used to generate the edited video. |
+| Error | Error message if editing fails. |
+
+### Possible use case
+Create a shorter version of a training video by removing sentences from the transcript instead of using a timeline-based video editor.
--- a/docs/platform/blocks/transcribe_video.md
+++ b/docs/platform/blocks/transcribe_video.md
@@ -0,0 +1,24 @@
+## Transcribe Video
+
+### What it is
+A block that converts the speech in a video file into text.
+
+### What it does
+This block accepts a video and returns a text transcription of the spoken content.
+
+### How it works
+The block processes the provided video using a speech‑to‑text engine. The resulting text is returned for use in other blocks, such as text‑based video editing.
+
+### Inputs
+| Input | Description |
+|-------|-------------|
+| Video | The video file to transcribe. |
+
+### Outputs
+| Output | Description |
+|--------|-------------|
+| Transcription | The text transcription extracted from the video. |
+| Error | Error message if the transcription fails. |
+
+### Possible use case
+Use this block to generate a transcript of a recorded meeting so that you can review or edit the content in text form.
Author	SHA1	Message	Date
Nicholas Tindle	c38ff0187b	Merge branch 'dev' into codex/add-edit-video-and-transcribe-video-blocks	2026-01-16 15:05:33 -06:00
claude[bot]	94f3852f2d	fix(blocks): add missing user_id parameter to video blocks Add required user_id parameter to TranscribeVideoBlock and EditVideoByTextBlock run methods, and pass it to store_media_file() calls to fix block test failures. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Nicholas Tindle <ntindle@users.noreply.github.com>	2026-01-16 17:57:14 +00:00
Bentlybro	cc3daef414	fix tests	2026-01-16 17:57:11 +00:00
Bentlybro	fd042f8259	format	2026-01-16 17:57:07 +00:00
Bentlybro	419baf3b47	get both blocks working	2026-01-16 17:57:03 +00:00
Toran Bruce Richards	0207fab199	Update autogpt_platform/backend/backend/blocks/transcribe_video.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2026-01-16 17:57:00 +00:00
Toran Bruce Richards	e7b4f3ff7a	fix(blocks): handle relative video path	2026-01-16 17:56:56 +00:00
Toran Bruce Richards	f6c2d519e1	fix(blocks): use data uris for video test input	2026-01-16 17:56:52 +00:00
claude[bot]	02746102b4	feat(blocks): add video transcription and editing blocks	2026-01-16 17:56:48 +00:00