Compare commits

...

9 Commits

Author SHA1 Message Date
Nicholas Tindle
c38ff0187b Merge branch 'dev' into codex/add-edit-video-and-transcribe-video-blocks 2026-01-16 15:05:33 -06:00
claude[bot]
94f3852f2d fix(blocks): add missing user_id parameter to video blocks
Add required user_id parameter to TranscribeVideoBlock and
EditVideoByTextBlock run methods, and pass it to store_media_file()
calls to fix block test failures.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Nicholas Tindle <ntindle@users.noreply.github.com>
2026-01-16 17:57:14 +00:00
Bentlybro
cc3daef414 fix tests 2026-01-16 17:57:11 +00:00
Bentlybro
fd042f8259 format 2026-01-16 17:57:07 +00:00
Bentlybro
419baf3b47 get both blocks working 2026-01-16 17:57:03 +00:00
Toran Bruce Richards
0207fab199 Update autogpt_platform/backend/backend/blocks/transcribe_video.py
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-01-16 17:57:00 +00:00
Toran Bruce Richards
e7b4f3ff7a fix(blocks): handle relative video path 2026-01-16 17:56:56 +00:00
Toran Bruce Richards
f6c2d519e1 fix(blocks): use data uris for video test input 2026-01-16 17:56:52 +00:00
claude[bot]
02746102b4 feat(blocks): add video transcription and editing blocks 2026-01-16 17:56:48 +00:00
5 changed files with 350 additions and 0 deletions

View File

@@ -0,0 +1,162 @@
from __future__ import annotations
import base64
from typing import Literal
from pydantic import SecretStr
from replicate.client import Client as ReplicateClient
from replicate.helpers import FileOutput
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import (
APIKeyCredentials,
CredentialsField,
CredentialsMetaInput,
SchemaField,
)
from backend.integrations.providers import ProviderName
from backend.util.file import get_exec_file_path, store_media_file
from backend.util.type import MediaFileType
TEST_CREDENTIALS = APIKeyCredentials(
id="01234567-89ab-cdef-0123-456789abcdef",
provider="replicate",
api_key=SecretStr("mock-replicate-api-key"),
title="Mock Replicate API key",
expires_at=None,
)
TEST_CREDENTIALS_INPUT = {
"provider": TEST_CREDENTIALS.provider,
"id": TEST_CREDENTIALS.id,
"type": TEST_CREDENTIALS.type,
"title": TEST_CREDENTIALS.title,
}
class EditVideoByTextBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput[
Literal[ProviderName.REPLICATE], Literal["api_key"]
] = CredentialsField(
description="The Replicate integration can be used with "
"any API key with sufficient permissions for the blocks it is used on.",
)
video_in: MediaFileType = SchemaField(
description="Video file to edit",
)
transcription: str = SchemaField(
description="Desired transcript for the output video",
)
split_at: str = SchemaField(
description="Granularity for transcript matching",
default="word",
)
class Output(BlockSchema):
video_url: str = SchemaField(
description="URL of the edited video",
)
transcription: str = SchemaField(
description="Transcription used for editing",
)
error: str = SchemaField(
description="Error message if something fails",
default="",
)
def __init__(self) -> None:
super().__init__(
id="98d40049-a1de-465f-bba1-47411298ad1a",
description="Edits a video by modifying its transcript.",
categories={BlockCategory.MULTIMEDIA},
input_schema=EditVideoByTextBlock.Input,
output_schema=EditVideoByTextBlock.Output,
test_input={
"credentials": TEST_CREDENTIALS_INPUT,
"video_in": "data:video/mp4;base64,AAAA",
"transcription": "edited transcript",
},
test_output=[
("video_url", "https://replicate.com/output/video.mp4"),
("transcription", "edited transcript"),
],
test_mock={
"edit_video": lambda file_path, transcription, split_at, api_key: "https://replicate.com/output/video.mp4"
},
test_credentials=TEST_CREDENTIALS,
)
async def edit_video(
self, file_path: str, transcription: str, split_at: str, api_key: SecretStr
) -> str:
"""Use Replicate's API to edit the video."""
try:
client = ReplicateClient(api_token=api_key.get_secret_value())
# Convert file path to file URL
with open(file_path, "rb") as f:
file_data = f.read()
file_b64 = base64.b64encode(file_data).decode()
file_url = f"data:video/mp4;base64,{file_b64}"
output = await client.async_run(
"jd7h/edit-video-by-editing-text:e010b880347314d07e3ce3b21cbd4c57add51fea3474677a6cb1316751c4cb90",
input={
"mode": "edit",
"video_in": file_url,
"transcription": transcription,
"split_at": split_at,
},
wait=False,
)
# Get video URL from output
if isinstance(output, dict) and "video" in output:
video_output = output["video"]
if isinstance(video_output, FileOutput):
return video_output.url
return str(video_output)
elif isinstance(output, list) and len(output) > 0:
video_url = output[0]
if isinstance(video_url, FileOutput):
return video_url.url
return str(video_url)
elif isinstance(output, FileOutput):
return output.url
elif isinstance(output, str):
return output
raise ValueError(f"Unexpected output format from Replicate API: {output}")
except Exception:
raise
async def run(
self,
input_data: Input,
*,
credentials: APIKeyCredentials,
graph_exec_id: str,
user_id: str,
**kwargs,
) -> BlockOutput:
try:
local_path = await store_media_file(
graph_exec_id=graph_exec_id,
file=input_data.video_in,
user_id=user_id,
return_content=False,
)
abs_path = get_exec_file_path(graph_exec_id, local_path)
video_url = await self.edit_video(
abs_path,
input_data.transcription,
input_data.split_at,
credentials.api_key,
)
yield "video_url", video_url
yield "transcription", input_data.transcription
except Exception as e:
error_msg = f"Failed to edit video: {str(e)}"
yield "error", error_msg

View File

@@ -0,0 +1,135 @@
from __future__ import annotations
import base64
from typing import Literal
from pydantic import SecretStr
from replicate.client import Client as ReplicateClient
from replicate.helpers import FileOutput
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import (
APIKeyCredentials,
CredentialsField,
CredentialsMetaInput,
SchemaField,
)
from backend.integrations.providers import ProviderName
from backend.util.file import get_exec_file_path, store_media_file
from backend.util.type import MediaFileType
TEST_CREDENTIALS = APIKeyCredentials(
id="01234567-89ab-cdef-0123-456789abcdef",
provider="replicate",
api_key=SecretStr("mock-replicate-api-key"),
title="Mock Replicate API key",
expires_at=None,
)
TEST_CREDENTIALS_INPUT = {
"provider": TEST_CREDENTIALS.provider,
"id": TEST_CREDENTIALS.id,
"type": TEST_CREDENTIALS.type,
"title": TEST_CREDENTIALS.title,
}
class TranscribeVideoBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput[
Literal[ProviderName.REPLICATE], Literal["api_key"]
] = CredentialsField(
description="The Replicate integration can be used with "
"any API key with sufficient permissions for the blocks it is used on.",
)
video_in: MediaFileType = SchemaField(
description="Video file to transcribe",
)
class Output(BlockSchema):
transcription: str = SchemaField(
description="Text transcription of the video",
)
error: str = SchemaField(
description="Error message if something fails",
default="",
)
def __init__(self) -> None:
super().__init__(
id="fa49dad0-a5fc-441c-ba04-2ac206e392d8",
description="Transcribes speech from a video file.",
categories={BlockCategory.MULTIMEDIA},
input_schema=TranscribeVideoBlock.Input,
output_schema=TranscribeVideoBlock.Output,
test_input={
"credentials": TEST_CREDENTIALS_INPUT,
"video_in": "data:video/mp4;base64,AAAA",
},
test_output=("transcription", "example transcript"),
test_mock={"transcribe": lambda file_path, api_key: "example transcript"},
test_credentials=TEST_CREDENTIALS,
)
async def transcribe(self, file_path: str, api_key: SecretStr) -> str:
"""Use Replicate's API to transcribe the video."""
try:
client = ReplicateClient(api_token=api_key.get_secret_value())
# Convert file path to file URL
with open(file_path, "rb") as f:
file_data = f.read()
file_b64 = base64.b64encode(file_data).decode()
file_url = f"data:video/mp4;base64,{file_b64}"
output = await client.async_run(
"jd7h/edit-video-by-editing-text:e010b880347314d07e3ce3b21cbd4c57add51fea3474677a6cb1316751c4cb90",
input={
"mode": "transcribe",
"video_in": file_url,
},
wait=False,
)
# Handle dictionary response format
if isinstance(output, dict):
if "transcription" in output:
return output["transcription"]
elif "error" in output:
raise ValueError(f"API returned error: {output['error']}")
# Handle list/string formats as before
elif isinstance(output, list) and len(output) > 0:
if isinstance(output[0], FileOutput):
return output[0].url
return output[0]
elif isinstance(output, FileOutput):
return output.url
elif isinstance(output, str):
return output
raise ValueError(f"Unexpected output format from Replicate API: {output}")
except Exception:
raise
async def run(
self,
input_data: Input,
*,
credentials: APIKeyCredentials,
graph_exec_id: str,
user_id: str,
**kwargs,
) -> BlockOutput:
try:
local_path = await store_media_file(
graph_exec_id=graph_exec_id,
file=input_data.video_in,
user_id=user_id,
return_content=False,
)
abs_path = get_exec_file_path(graph_exec_id, local_path)
transcript = await self.transcribe(abs_path, credentials.api_key)
yield "transcription", transcript
except Exception as e:
error_msg = f"Failed to transcribe video: {str(e)}"
yield "error", error_msg

View File

@@ -93,11 +93,13 @@ Below is a comprehensive list of all available blocks, categorized by their prim
| [AI Shortform Video Creator](ai_shortform_video_block.md#ai-shortform-video-creator) | Generates short-form videos using AI |
| [Replicate Flux Advanced Model](replicate_flux_advanced.md#replicate-flux-advanced-model) | Creates images using Replicate's Flux models |
| [Flux Kontext](flux_kontext.md#flux-kontext) | Text-based image editing using Flux Kontext |
| [Edit Video by Text](edit_video_by_text.md#edit-video-by-text) | Edit videos by modifying their transcript |
## Miscellaneous
| Block Name | Description |
|------------|-------------|
| [Transcribe YouTube Video](youtube.md#transcribe-youtube-video) | Transcribes audio from YouTube videos |
| [Transcribe Video](transcribe_video.md#transcribe-video) | Converts speech in a video file to text |
| [Send Email](email_block.md#send-email) | Sends emails using SMTP |
| [Condition Block](branching.md#condition-block) | Evaluates conditions for workflow branching |
| [Step Through Items](iteration.md#step-through-items) | Iterates through lists or dictionaries |

View File

@@ -0,0 +1,27 @@
## Edit Video by Text
### What it is
A block that edits a video by cutting segments based on an edited transcript.
### What it does
After providing a target transcript, the block removes portions of the video that no longer appear in the text, returning a new edited video file.
### How it works
The block compares the supplied transcript with the video's original transcript. Segments that are missing from the target transcript are removed. Word-level matching is used by default.
### Inputs
| Input | Description |
|-------|-------------|
| Video | The original video file to edit. |
| Transcription | The desired transcript of the output video. |
| Split At | Level of precision for transcript matching ("word" or "character"). |
### Outputs
| Output | Description |
|--------|-------------|
| Video | Path to the edited video. |
| Transcription | The transcript used to generate the edited video. |
| Error | Error message if editing fails. |
### Possible use case
Create a shorter version of a training video by removing sentences from the transcript instead of using a timeline-based video editor.

View File

@@ -0,0 +1,24 @@
## Transcribe Video
### What it is
A block that converts the speech in a video file into text.
### What it does
This block accepts a video and returns a text transcription of the spoken content.
### How it works
The block processes the provided video using a speechtotext engine. The resulting text is returned for use in other blocks, such as textbased video editing.
### Inputs
| Input | Description |
|-------|-------------|
| Video | The video file to transcribe. |
### Outputs
| Output | Description |
|--------|-------------|
| Transcription | The text transcription extracted from the video. |
| Error | Error message if the transcription fails. |
### Possible use case
Use this block to generate a transcript of a recorded meeting so that you can review or edit the content in text form.