feat(blocks): add video transcription and editing blocks on dev

- Add TranscribeVideoBlock and EditVideoByTextBlock to blocks/video/
- Update video/__init__.py with new block exports
- Generate block documentation via generate_block_docs.py
- Fix wait=False bug in Replicate API calls (was returning Prediction
  object instead of actual output)
- Format fixes

Co-authored-by: Nicholas Tindle <ntindle@users.noreply.github.com>
This commit is contained in:
claude[bot]
2026-02-09 07:51:52 +00:00
parent 1a16e203b8
commit fda10563e7
8 changed files with 380 additions and 2 deletions

View File

@@ -9,11 +9,14 @@ This module provides blocks for:
- Getting media duration
- Looping videos
- Adding audio to videos
- Transcribing video speech to text
- Editing videos by modifying their transcript
Dependencies:
- yt-dlp: For video downloading
- moviepy: For video editing operations
- elevenlabs: For AI narration (optional)
- replicate: For video transcription and text-based editing
"""
from backend.blocks.video.add_audio import AddAudioToVideoBlock
@@ -21,14 +24,18 @@ from backend.blocks.video.clip import VideoClipBlock
from backend.blocks.video.concat import VideoConcatBlock
from backend.blocks.video.download import VideoDownloadBlock
from backend.blocks.video.duration import MediaDurationBlock
from backend.blocks.video.edit_by_text import EditVideoByTextBlock
from backend.blocks.video.loop import LoopVideoBlock
from backend.blocks.video.narration import VideoNarrationBlock
from backend.blocks.video.text_overlay import VideoTextOverlayBlock
from backend.blocks.video.transcribe import TranscribeVideoBlock
__all__ = [
"AddAudioToVideoBlock",
"EditVideoByTextBlock",
"LoopVideoBlock",
"MediaDurationBlock",
"TranscribeVideoBlock",
"VideoClipBlock",
"VideoConcatBlock",
"VideoDownloadBlock",

View File

@@ -0,0 +1,159 @@
"""EditVideoByTextBlock - Edit a video by modifying its transcript via Replicate."""
from __future__ import annotations
import logging
from typing import Literal
from replicate.client import Client as ReplicateClient
from replicate.helpers import FileOutput
from backend.blocks.replicate._auth import (
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
ReplicateCredentials,
ReplicateCredentialsInput,
)
from backend.data.block import (
Block,
BlockCategory,
BlockOutput,
BlockSchemaInput,
BlockSchemaOutput,
)
from backend.data.execution import ExecutionContext
from backend.data.model import CredentialsField, SchemaField
from backend.util.exceptions import BlockExecutionError
from backend.util.file import MediaFileType, store_media_file
logger = logging.getLogger(__name__)
class EditVideoByTextBlock(Block):
"""Edit a video by modifying its transcript, cutting segments via Replicate API."""
class Input(BlockSchemaInput):
credentials: ReplicateCredentialsInput = CredentialsField(
description="Replicate API key for video editing.",
)
video_in: MediaFileType = SchemaField(
description="Input video file to edit (URL, data URI, or local path)",
)
transcription: str = SchemaField(
description="Desired transcript for the output video",
)
split_at: Literal["word", "character"] = SchemaField(
description="Granularity for transcript matching",
default="word",
)
class Output(BlockSchemaOutput):
video_url: str = SchemaField(
description="URL of the edited video",
)
transcription: str = SchemaField(
description="Transcription used for editing",
)
def __init__(self):
super().__init__(
id="98d40049-a1de-465f-bba1-47411298ad1a",
description="Edit a video by modifying its transcript",
categories={BlockCategory.MULTIMEDIA},
input_schema=self.Input,
output_schema=self.Output,
test_input={
"credentials": TEST_CREDENTIALS_INPUT,
"video_in": "data:video/mp4;base64,AAAA",
"transcription": "edited transcript",
},
test_output=[
("video_url", "https://replicate.com/output/video.mp4"),
("transcription", "edited transcript"),
],
test_mock={
"_edit_video": lambda *args: "https://replicate.com/output/video.mp4",
"_store_input_video": lambda *args, **kwargs: "data:video/mp4;base64,AAAA",
},
test_credentials=TEST_CREDENTIALS,
)
async def _store_input_video(
self, execution_context: ExecutionContext, file: MediaFileType
) -> MediaFileType:
"""Store input video locally. Extracted for testability."""
return await store_media_file(
file=file,
execution_context=execution_context,
return_format="for_external_api",
)
async def _edit_video(
self, data_uri: str, transcription: str, split_at: str, api_key: str
) -> str:
"""Call Replicate API to edit the video based on the transcript."""
client = ReplicateClient(api_token=api_key)
output = await client.async_run(
"jd7h/edit-video-by-editing-text:e010b880347314d07e3ce3b21cbd4c57add51fea3474677a6cb1316751c4cb90",
input={
"mode": "edit",
"video_in": data_uri,
"transcription": transcription,
"split_at": split_at,
},
)
# Get video URL from output
if isinstance(output, dict) and "video" in output:
video_output = output["video"]
if isinstance(video_output, FileOutput):
return video_output.url
return str(video_output)
if isinstance(output, list) and len(output) > 0:
video_url = output[0]
if isinstance(video_url, FileOutput):
return video_url.url
return str(video_url)
if isinstance(output, FileOutput):
return output.url
if isinstance(output, str):
return output
raise ValueError(f"Unexpected output format from Replicate API: {output}")
async def run(
self,
input_data: Input,
*,
credentials: ReplicateCredentials,
execution_context: ExecutionContext,
**kwargs,
) -> BlockOutput:
try:
# Store video and get data URI for API submission
data_uri = await self._store_input_video(
execution_context, input_data.video_in
)
video_url = await self._edit_video(
data_uri,
input_data.transcription,
input_data.split_at,
credentials.api_key.get_secret_value(),
)
yield "video_url", video_url
yield "transcription", input_data.transcription
except BlockExecutionError:
raise
except Exception as e:
raise BlockExecutionError(
message=f"Failed to edit video: {e}",
block_name=self.name,
block_id=str(self.id),
) from e

View File

@@ -0,0 +1,139 @@
"""TranscribeVideoBlock - Transcribe speech from a video file using Replicate."""
from __future__ import annotations
import logging
from replicate.client import Client as ReplicateClient
from replicate.helpers import FileOutput
from backend.blocks.replicate._auth import (
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
ReplicateCredentials,
ReplicateCredentialsInput,
)
from backend.data.block import (
Block,
BlockCategory,
BlockOutput,
BlockSchemaInput,
BlockSchemaOutput,
)
from backend.data.execution import ExecutionContext
from backend.data.model import CredentialsField, SchemaField
from backend.util.exceptions import BlockExecutionError
from backend.util.file import MediaFileType, store_media_file
logger = logging.getLogger(__name__)
class TranscribeVideoBlock(Block):
"""Transcribe speech from a video file to text via Replicate API."""
class Input(BlockSchemaInput):
credentials: ReplicateCredentialsInput = CredentialsField(
description="Replicate API key for video transcription.",
)
video_in: MediaFileType = SchemaField(
description="Input video file to transcribe (URL, data URI, or local path)",
)
class Output(BlockSchemaOutput):
transcription: str = SchemaField(
description="Text transcription extracted from the video",
)
def __init__(self):
super().__init__(
id="fa49dad0-a5fc-441c-ba04-2ac206e392d8",
description="Transcribe speech from a video file to text",
categories={BlockCategory.MULTIMEDIA},
input_schema=self.Input,
output_schema=self.Output,
test_input={
"credentials": TEST_CREDENTIALS_INPUT,
"video_in": "data:video/mp4;base64,AAAA",
},
test_output=[("transcription", "example transcript")],
test_mock={
"_transcribe": lambda *args: "example transcript",
"_store_input_video": lambda *args, **kwargs: "test.mp4",
},
test_credentials=TEST_CREDENTIALS,
)
async def _store_input_video(
self, execution_context: ExecutionContext, file: MediaFileType
) -> MediaFileType:
"""Store input video locally. Extracted for testability."""
return await store_media_file(
file=file,
execution_context=execution_context,
return_format="for_external_api",
)
async def _transcribe(self, data_uri: str, api_key: str) -> str:
"""Call Replicate API to transcribe the video."""
client = ReplicateClient(api_token=api_key)
output = await client.async_run(
"jd7h/edit-video-by-editing-text:e010b880347314d07e3ce3b21cbd4c57add51fea3474677a6cb1316751c4cb90",
input={
"mode": "transcribe",
"video_in": data_uri,
},
)
# Handle dictionary response format
if isinstance(output, dict):
if "transcription" in output:
return str(output["transcription"])
if "error" in output:
raise ValueError(f"API returned error: {output['error']}")
# Handle list formats
if isinstance(output, list) and len(output) > 0:
if isinstance(output[0], FileOutput):
return output[0].url
if isinstance(output[0], dict) and "text" in output[0]:
return " ".join(
segment.get("text", "") for segment in output # type: ignore
)
return str(output[0])
if isinstance(output, FileOutput):
return output.url
if isinstance(output, str):
return output
raise ValueError(f"Unexpected output format from Replicate API: {output}")
async def run(
self,
input_data: Input,
*,
credentials: ReplicateCredentials,
execution_context: ExecutionContext,
**kwargs,
) -> BlockOutput:
try:
# Store video and get data URI for API submission
data_uri = await self._store_input_video(
execution_context, input_data.video_in
)
transcript = await self._transcribe(
data_uri, credentials.api_key.get_secret_value()
)
yield "transcription", transcript
except BlockExecutionError:
raise
except Exception as e:
raise BlockExecutionError(
message=f"Failed to transcribe video: {e}",
block_name=self.name,
block_id=str(self.id),
) from e

View File

@@ -3,8 +3,6 @@
import queue
import threading
import pytest
from backend.data.execution import ExecutionQueue

View File

@@ -474,8 +474,10 @@ Below is a comprehensive list of all available blocks, categorized by their prim
| Block Name | Description |
|------------|-------------|
| [Add Audio To Video](block-integrations/video/add_audio.md#add-audio-to-video) | Block to attach an audio file to a video file using moviepy |
| [Edit Video By Text](block-integrations/video/edit_by_text.md#edit-video-by-text) | Edit a video by modifying its transcript |
| [Loop Video](block-integrations/video/loop.md#loop-video) | Block to loop a video to a given duration or number of repeats |
| [Media Duration](block-integrations/video/duration.md#media-duration) | Block to get the duration of a media file |
| [Transcribe Video](block-integrations/video/transcribe.md#transcribe-video) | Transcribe speech from a video file to text |
| [Video Clip](block-integrations/video/clip.md#video-clip) | Extract a time segment from a video |
| [Video Concat](block-integrations/video/concat.md#video-concat) | Merge multiple video clips into one continuous video |
| [Video Download](block-integrations/video/download.md#video-download) | Download video from URL (YouTube, Vimeo, news sites, direct links) |

View File

@@ -133,8 +133,10 @@
* [Video Concat](block-integrations/video/concat.md)
* [Video Download](block-integrations/video/download.md)
* [Video Duration](block-integrations/video/duration.md)
* [Video Edit By Text](block-integrations/video/edit_by_text.md)
* [Video Loop](block-integrations/video/loop.md)
* [Video Narration](block-integrations/video/narration.md)
* [Video Text Overlay](block-integrations/video/text_overlay.md)
* [Video Transcribe](block-integrations/video/transcribe.md)
* [Wolfram LLM API](block-integrations/wolfram/llm_api.md)
* [Zerobounce Validate Emails](block-integrations/zerobounce/validate_emails.md)

View File

@@ -0,0 +1,37 @@
# Video Edit By Text
<!-- MANUAL: file_description -->
_Add a description of this category of blocks._
<!-- END MANUAL -->
## Edit Video By Text
### What it is
Edit a video by modifying its transcript
### How it works
<!-- MANUAL: how_it_works -->
_Add technical explanation here._
<!-- END MANUAL -->
### Inputs
| Input | Description | Type | Required |
|-------|-------------|------|----------|
| video_in | Input video file to edit (URL, data URI, or local path) | str (file) | Yes |
| transcription | Desired transcript for the output video | str | Yes |
| split_at | Granularity for transcript matching | "word" \| "character" | No |
### Outputs
| Output | Description | Type |
|--------|-------------|------|
| error | Error message if the operation failed | str |
| video_url | URL of the edited video | str |
| transcription | Transcription used for editing | str |
### Possible use case
<!-- MANUAL: use_case -->
_Add practical use case examples here._
<!-- END MANUAL -->
---

View File

@@ -0,0 +1,34 @@
# Video Transcribe
<!-- MANUAL: file_description -->
_Add a description of this category of blocks._
<!-- END MANUAL -->
## Transcribe Video
### What it is
Transcribe speech from a video file to text
### How it works
<!-- MANUAL: how_it_works -->
_Add technical explanation here._
<!-- END MANUAL -->
### Inputs
| Input | Description | Type | Required |
|-------|-------------|------|----------|
| video_in | Input video file to transcribe (URL, data URI, or local path) | str (file) | Yes |
### Outputs
| Output | Description | Type |
|--------|-------------|------|
| error | Error message if the operation failed | str |
| transcription | Text transcription extracted from the video | str |
### Possible use case
<!-- MANUAL: use_case -->
_Add practical use case examples here._
<!-- END MANUAL -->
---