Compare commits

...

5 Commits

Author SHA1 Message Date
Nicholas Tindle
5797afd28b docs(blocks): improve video block descriptions and regenerate docs
Better "What it is" descriptions so the generated docs are
self-explanatory without a separate "What it does" section.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 16:45:20 -06:00
Nicholas Tindle
ac27f1b825 fix(blocks): use store_media_file for_block_output on edit video output
The edit block was returning a raw Replicate URL instead of piping the
output through store_media_file with for_block_output. This broke
workspace:// URI generation in CoPilot. Matches the pattern used by all
sibling video blocks (clip, narration, concat, etc).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 16:35:05 -06:00
Nicholas Tindle
063a379c64 fix(blocks): correct imports, improve descriptions, and read FileOutput content
- Fix imports to use backend.blocks._base instead of backend.data.block
- Improve field descriptions for transcription and split_at inputs
- Read FileOutput content with aread() instead of returning URL
- Fill in manual documentation sections for both video blocks

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 16:20:08 -06:00
Nicholas Tindle
b98c02278a Merge branch 'dev' into codex/add-edit-video-and-transcribe-video-blocks 2026-03-04 03:15:39 -06:00
claude[bot]
fda10563e7 feat(blocks): add video transcription and editing blocks on dev
- Add TranscribeVideoBlock and EditVideoByTextBlock to blocks/video/
- Update video/__init__.py with new block exports
- Generate block documentation via generate_block_docs.py
- Fix wait=False bug in Replicate API calls (was returning Prediction
  object instead of actual output)
- Format fixes

Co-authored-by: Nicholas Tindle <ntindle@users.noreply.github.com>
2026-02-09 07:51:52 +00:00
7 changed files with 406 additions and 0 deletions

View File

@@ -9,11 +9,14 @@ This module provides blocks for:
- Getting media duration
- Looping videos
- Adding audio to videos
- Transcribing video speech to text
- Editing videos by modifying their transcript
Dependencies:
- yt-dlp: For video downloading
- moviepy: For video editing operations
- elevenlabs: For AI narration (optional)
- replicate: For video transcription and text-based editing
"""
from backend.blocks.video.add_audio import AddAudioToVideoBlock
@@ -21,14 +24,18 @@ from backend.blocks.video.clip import VideoClipBlock
from backend.blocks.video.concat import VideoConcatBlock
from backend.blocks.video.download import VideoDownloadBlock
from backend.blocks.video.duration import MediaDurationBlock
from backend.blocks.video.edit_by_text import EditVideoByTextBlock
from backend.blocks.video.loop import LoopVideoBlock
from backend.blocks.video.narration import VideoNarrationBlock
from backend.blocks.video.text_overlay import VideoTextOverlayBlock
from backend.blocks.video.transcribe import TranscribeVideoBlock
__all__ = [
"AddAudioToVideoBlock",
"EditVideoByTextBlock",
"LoopVideoBlock",
"MediaDurationBlock",
"TranscribeVideoBlock",
"VideoClipBlock",
"VideoConcatBlock",
"VideoDownloadBlock",

View File

@@ -0,0 +1,175 @@
"""EditVideoByTextBlock - Edit a video by modifying its transcript via Replicate."""
from __future__ import annotations
import logging
from typing import Literal
from replicate.client import Client as ReplicateClient
from replicate.helpers import FileOutput
from backend.blocks._base import (
Block,
BlockCategory,
BlockOutput,
BlockSchemaInput,
BlockSchemaOutput,
)
from backend.blocks.replicate._auth import (
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
ReplicateCredentials,
ReplicateCredentialsInput,
)
from backend.data.execution import ExecutionContext
from backend.data.model import CredentialsField, SchemaField
from backend.util.exceptions import BlockExecutionError
from backend.util.file import MediaFileType, store_media_file
logger = logging.getLogger(__name__)
class EditVideoByTextBlock(Block):
"""Edit a video by modifying its transcript, cutting segments via Replicate API."""
class Input(BlockSchemaInput):
credentials: ReplicateCredentialsInput = CredentialsField(
description="Replicate API key for video editing.",
)
video_in: MediaFileType = SchemaField(
description="Input video file to edit (URL, data URI, or local path)",
)
transcription: str = SchemaField(
description="Modified transcript of the input video — segments absent from this text will be cut from the output video",
)
split_at: Literal["word", "character"] = SchemaField(
description="Alignment granularity for transcript matching: 'word' aligns cuts at word boundaries, 'character' allows finer sub-word alignment",
default="word",
)
class Output(BlockSchemaOutput):
video_out: MediaFileType = SchemaField(
description="Edited video file (path or data URI)",
)
transcription: str = SchemaField(
description="Transcription used for editing",
)
def __init__(self):
super().__init__(
id="98d40049-a1de-465f-bba1-47411298ad1a",
description="Edit a video by modifying its transcript — segments you remove from the transcript are cut from the output video",
categories={BlockCategory.MULTIMEDIA},
input_schema=self.Input,
output_schema=self.Output,
test_input={
"credentials": TEST_CREDENTIALS_INPUT,
"video_in": "data:video/mp4;base64,AAAA",
"transcription": "edited transcript",
},
test_output=[
("video_out", str),
("transcription", "edited transcript"),
],
test_mock={
"_edit_video": lambda *args: "https://replicate.com/output/video.mp4",
"_store_input_video": lambda *args, **kwargs: "data:video/mp4;base64,AAAA",
"_store_output_video": lambda *args, **kwargs: "edited_video.mp4",
},
test_credentials=TEST_CREDENTIALS,
)
async def _store_input_video(
self, execution_context: ExecutionContext, file: MediaFileType
) -> MediaFileType:
"""Store input video locally. Extracted for testability."""
return await store_media_file(
file=file,
execution_context=execution_context,
return_format="for_external_api",
)
async def _store_output_video(
self, execution_context: ExecutionContext, file: MediaFileType
) -> MediaFileType:
"""Store output video. Extracted for testability."""
return await store_media_file(
file=file,
execution_context=execution_context,
return_format="for_block_output",
)
async def _edit_video(
self, data_uri: str, transcription: str, split_at: str, api_key: str
) -> str:
"""Call Replicate API to edit the video based on the transcript."""
client = ReplicateClient(api_token=api_key)
output = await client.async_run(
"jd7h/edit-video-by-editing-text:e010b880347314d07e3ce3b21cbd4c57add51fea3474677a6cb1316751c4cb90",
input={
"mode": "edit",
"video_in": data_uri,
"transcription": transcription,
"split_at": split_at,
},
)
# Get video URL from output
if isinstance(output, dict) and "video" in output:
video_output = output["video"]
if isinstance(video_output, FileOutput):
return video_output.url
return str(video_output)
if isinstance(output, list) and len(output) > 0:
video_url = output[0]
if isinstance(video_url, FileOutput):
return video_url.url
return str(video_url)
if isinstance(output, FileOutput):
return output.url
if isinstance(output, str):
return output
raise ValueError(f"Unexpected output format from Replicate API: {output}")
async def run(
self,
input_data: Input,
*,
credentials: ReplicateCredentials,
execution_context: ExecutionContext,
**kwargs,
) -> BlockOutput:
try:
# Store video and get data URI for API submission
data_uri = await self._store_input_video(
execution_context, input_data.video_in
)
video_url = await self._edit_video(
data_uri,
input_data.transcription,
input_data.split_at,
credentials.api_key.get_secret_value(),
)
# Store output through workspace so CoPilot gets workspace:// URIs
video_out = await self._store_output_video(
execution_context, MediaFileType(video_url)
)
yield "video_out", video_out
yield "transcription", input_data.transcription
except BlockExecutionError:
raise
except Exception as e:
raise BlockExecutionError(
message=f"Failed to edit video: {e}",
block_name=self.name,
block_id=str(self.id),
) from e

View File

@@ -0,0 +1,141 @@
"""TranscribeVideoBlock - Transcribe speech from a video file using Replicate."""
from __future__ import annotations
import logging
from replicate.client import Client as ReplicateClient
from replicate.helpers import FileOutput
from backend.blocks._base import (
Block,
BlockCategory,
BlockOutput,
BlockSchemaInput,
BlockSchemaOutput,
)
from backend.blocks.replicate._auth import (
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
ReplicateCredentials,
ReplicateCredentialsInput,
)
from backend.data.execution import ExecutionContext
from backend.data.model import CredentialsField, SchemaField
from backend.util.exceptions import BlockExecutionError
from backend.util.file import MediaFileType, store_media_file
logger = logging.getLogger(__name__)
class TranscribeVideoBlock(Block):
"""Transcribe speech from a video file to text via Replicate API."""
class Input(BlockSchemaInput):
credentials: ReplicateCredentialsInput = CredentialsField(
description="Replicate API key for video transcription.",
)
video_in: MediaFileType = SchemaField(
description="Input video file to transcribe (URL, data URI, or local path)",
)
class Output(BlockSchemaOutput):
transcription: str = SchemaField(
description="Text transcription extracted from the video",
)
def __init__(self):
super().__init__(
id="fa49dad0-a5fc-441c-ba04-2ac206e392d8",
description="Extract spoken words from a video and return them as a text transcription",
categories={BlockCategory.MULTIMEDIA},
input_schema=self.Input,
output_schema=self.Output,
test_input={
"credentials": TEST_CREDENTIALS_INPUT,
"video_in": "data:video/mp4;base64,AAAA",
},
test_output=[("transcription", "example transcript")],
test_mock={
"_transcribe": lambda *args: "example transcript",
"_store_input_video": lambda *args, **kwargs: "test.mp4",
},
test_credentials=TEST_CREDENTIALS,
)
async def _store_input_video(
self, execution_context: ExecutionContext, file: MediaFileType
) -> MediaFileType:
"""Store input video locally. Extracted for testability."""
return await store_media_file(
file=file,
execution_context=execution_context,
return_format="for_external_api",
)
async def _transcribe(self, data_uri: str, api_key: str) -> str:
"""Call Replicate API to transcribe the video."""
client = ReplicateClient(api_token=api_key)
output = await client.async_run(
"jd7h/edit-video-by-editing-text:e010b880347314d07e3ce3b21cbd4c57add51fea3474677a6cb1316751c4cb90",
input={
"mode": "transcribe",
"video_in": data_uri,
},
)
# Handle dictionary response format
if isinstance(output, dict):
if "transcription" in output:
return str(output["transcription"])
if "error" in output:
raise ValueError(f"API returned error: {output['error']}")
# Handle list formats
if isinstance(output, list) and len(output) > 0:
if isinstance(output[0], FileOutput):
content = await output[0].aread()
return content.decode("utf-8")
if isinstance(output[0], dict) and "text" in output[0]:
return " ".join(
segment.get("text", "") for segment in output # type: ignore
)
return str(output[0])
if isinstance(output, FileOutput):
content = await output.aread()
return content.decode("utf-8")
if isinstance(output, str):
return output
raise ValueError(f"Unexpected output format from Replicate API: {output}")
async def run(
self,
input_data: Input,
*,
credentials: ReplicateCredentials,
execution_context: ExecutionContext,
**kwargs,
) -> BlockOutput:
try:
# Store video and get data URI for API submission
data_uri = await self._store_input_video(
execution_context, input_data.video_in
)
transcript = await self._transcribe(
data_uri, credentials.api_key.get_secret_value()
)
yield "transcription", transcript
except BlockExecutionError:
raise
except Exception as e:
raise BlockExecutionError(
message=f"Failed to transcribe video: {e}",
block_name=self.name,
block_id=str(self.id),
) from e

View File

@@ -492,8 +492,10 @@ Below is a comprehensive list of all available blocks, categorized by their prim
| Block Name | Description |
|------------|-------------|
| [Add Audio To Video](block-integrations/video/add_audio.md#add-audio-to-video) | Block to attach an audio file to a video file using moviepy |
| [Edit Video By Text](block-integrations/video/edit_by_text.md#edit-video-by-text) | Edit a video by modifying its transcript — segments you remove from the transcript are cut from the output video |
| [Loop Video](block-integrations/video/loop.md#loop-video) | Block to loop a video to a given duration or number of repeats |
| [Media Duration](block-integrations/video/duration.md#media-duration) | Block to get the duration of a media file |
| [Transcribe Video](block-integrations/video/transcribe.md#transcribe-video) | Extract spoken words from a video and return them as a text transcription |
| [Video Clip](block-integrations/video/clip.md#video-clip) | Extract a time segment from a video |
| [Video Concat](block-integrations/video/concat.md#video-concat) | Merge multiple video clips into one continuous video |
| [Video Download](block-integrations/video/download.md#video-download) | Download video from URL (YouTube, Vimeo, news sites, direct links) |

View File

@@ -136,8 +136,10 @@
* [Video Concat](block-integrations/video/concat.md)
* [Video Download](block-integrations/video/download.md)
* [Video Duration](block-integrations/video/duration.md)
* [Video Edit By Text](block-integrations/video/edit_by_text.md)
* [Video Loop](block-integrations/video/loop.md)
* [Video Narration](block-integrations/video/narration.md)
* [Video Text Overlay](block-integrations/video/text_overlay.md)
* [Video Transcribe](block-integrations/video/transcribe.md)
* [Wolfram LLM API](block-integrations/wolfram/llm_api.md)
* [Zerobounce Validate Emails](block-integrations/zerobounce/validate_emails.md)

View File

@@ -0,0 +1,41 @@
# Video Edit By Text
<!-- MANUAL: file_description -->
This block edits a video by modifying its transcript — segments absent from the supplied transcript are cut from the output video, powered by the Replicate API.
<!-- END MANUAL -->
## Edit Video By Text
### What it is
Edit a video by modifying its transcript — segments you remove from the transcript are cut from the output video
### How it works
<!-- MANUAL: how_it_works -->
The block sends the input video and the desired transcript to the Replicate API using the `jd7h/edit-video-by-editing-text` model in "edit" mode. The model aligns the provided transcript against the original speech in the video and removes any video segments whose speech is not present in the supplied transcript. The `split_at` parameter controls alignment granularity: `word` (default) aligns cuts at word boundaries for natural-sounding edits, while `character` allows finer sub-word alignment for more precise control. The block returns the edited video (stored via the workspace file system) along with the transcript that was used.
<!-- END MANUAL -->
### Inputs
| Input | Description | Type | Required |
|-------|-------------|------|----------|
| video_in | Input video file to edit (URL, data URI, or local path) | str (file) | Yes |
| transcription | Modified transcript of the input video — segments absent from this text will be cut from the output video | str | Yes |
| split_at | Alignment granularity for transcript matching: 'word' aligns cuts at word boundaries, 'character' allows finer sub-word alignment | "word" \| "character" | No |
### Outputs
| Output | Description | Type |
|--------|-------------|------|
| error | Error message if the operation failed | str |
| video_out | Edited video file (path or data URI) | str (file) |
| transcription | Transcription used for editing | str |
### Possible use case
<!-- MANUAL: use_case -->
**Interview Cleanup**: Remove filler words, false starts, or off-topic tangents from recorded interviews by editing the transcript and regenerating the video.
**Content Highlights**: Extract key segments from long-form video content by keeping only the relevant portions of the transcript.
**Automated Moderation**: Remove flagged or inappropriate speech segments from user-generated video content by stripping those lines from the transcript.
<!-- END MANUAL -->
---

View File

@@ -0,0 +1,38 @@
# Video Transcribe
<!-- MANUAL: file_description -->
This block transcribes speech from a video file to text using the Replicate API.
<!-- END MANUAL -->
## Transcribe Video
### What it is
Extract spoken words from a video and return them as a text transcription
### How it works
<!-- MANUAL: how_it_works -->
The block sends the input video to the Replicate API using the `jd7h/edit-video-by-editing-text` model in "transcribe" mode. This model analyzes the audio track of the video, performs speech recognition, and returns the detected speech as text. The block handles multiple API response formats (dictionary, list, string, and file output) to reliably extract the transcript text.
<!-- END MANUAL -->
### Inputs
| Input | Description | Type | Required |
|-------|-------------|------|----------|
| video_in | Input video file to transcribe (URL, data URI, or local path) | str (file) | Yes |
### Outputs
| Output | Description | Type |
|--------|-------------|------|
| error | Error message if the operation failed | str |
| transcription | Text transcription extracted from the video | str |
### Possible use case
<!-- MANUAL: use_case -->
**Subtitle Generation**: Transcribe video dialogue to create subtitle or caption files for accessibility and localization.
**Searchable Video Archives**: Convert speech in recorded meetings, interviews, or lectures into searchable text for indexing and retrieval.
**LLM Content Pipeline**: Feed video transcripts into language models for summarization, analysis, or content repurposing workflows.
<!-- END MANUAL -->
---