mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-20 20:48:11 -05:00
Compare commits
9 Commits
make-old-w
...
codex/add-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c38ff0187b | ||
|
|
94f3852f2d | ||
|
|
cc3daef414 | ||
|
|
fd042f8259 | ||
|
|
419baf3b47 | ||
|
|
0207fab199 | ||
|
|
e7b4f3ff7a | ||
|
|
f6c2d519e1 | ||
|
|
02746102b4 |
162
autogpt_platform/backend/backend/blocks/edit_video_by_text.py
Normal file
162
autogpt_platform/backend/backend/blocks/edit_video_by_text.py
Normal file
@@ -0,0 +1,162 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import SecretStr
|
||||
from replicate.client import Client as ReplicateClient
|
||||
from replicate.helpers import FileOutput
|
||||
|
||||
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
|
||||
from backend.data.model import (
|
||||
APIKeyCredentials,
|
||||
CredentialsField,
|
||||
CredentialsMetaInput,
|
||||
SchemaField,
|
||||
)
|
||||
from backend.integrations.providers import ProviderName
|
||||
from backend.util.file import get_exec_file_path, store_media_file
|
||||
from backend.util.type import MediaFileType
|
||||
|
||||
TEST_CREDENTIALS = APIKeyCredentials(
|
||||
id="01234567-89ab-cdef-0123-456789abcdef",
|
||||
provider="replicate",
|
||||
api_key=SecretStr("mock-replicate-api-key"),
|
||||
title="Mock Replicate API key",
|
||||
expires_at=None,
|
||||
)
|
||||
TEST_CREDENTIALS_INPUT = {
|
||||
"provider": TEST_CREDENTIALS.provider,
|
||||
"id": TEST_CREDENTIALS.id,
|
||||
"type": TEST_CREDENTIALS.type,
|
||||
"title": TEST_CREDENTIALS.title,
|
||||
}
|
||||
|
||||
|
||||
class EditVideoByTextBlock(Block):
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput[
|
||||
Literal[ProviderName.REPLICATE], Literal["api_key"]
|
||||
] = CredentialsField(
|
||||
description="The Replicate integration can be used with "
|
||||
"any API key with sufficient permissions for the blocks it is used on.",
|
||||
)
|
||||
video_in: MediaFileType = SchemaField(
|
||||
description="Video file to edit",
|
||||
)
|
||||
transcription: str = SchemaField(
|
||||
description="Desired transcript for the output video",
|
||||
)
|
||||
split_at: str = SchemaField(
|
||||
description="Granularity for transcript matching",
|
||||
default="word",
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
video_url: str = SchemaField(
|
||||
description="URL of the edited video",
|
||||
)
|
||||
transcription: str = SchemaField(
|
||||
description="Transcription used for editing",
|
||||
)
|
||||
error: str = SchemaField(
|
||||
description="Error message if something fails",
|
||||
default="",
|
||||
)
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
id="98d40049-a1de-465f-bba1-47411298ad1a",
|
||||
description="Edits a video by modifying its transcript.",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=EditVideoByTextBlock.Input,
|
||||
output_schema=EditVideoByTextBlock.Output,
|
||||
test_input={
|
||||
"credentials": TEST_CREDENTIALS_INPUT,
|
||||
"video_in": "data:video/mp4;base64,AAAA",
|
||||
"transcription": "edited transcript",
|
||||
},
|
||||
test_output=[
|
||||
("video_url", "https://replicate.com/output/video.mp4"),
|
||||
("transcription", "edited transcript"),
|
||||
],
|
||||
test_mock={
|
||||
"edit_video": lambda file_path, transcription, split_at, api_key: "https://replicate.com/output/video.mp4"
|
||||
},
|
||||
test_credentials=TEST_CREDENTIALS,
|
||||
)
|
||||
|
||||
async def edit_video(
|
||||
self, file_path: str, transcription: str, split_at: str, api_key: SecretStr
|
||||
) -> str:
|
||||
"""Use Replicate's API to edit the video."""
|
||||
try:
|
||||
client = ReplicateClient(api_token=api_key.get_secret_value())
|
||||
|
||||
# Convert file path to file URL
|
||||
with open(file_path, "rb") as f:
|
||||
file_data = f.read()
|
||||
file_b64 = base64.b64encode(file_data).decode()
|
||||
file_url = f"data:video/mp4;base64,{file_b64}"
|
||||
|
||||
output = await client.async_run(
|
||||
"jd7h/edit-video-by-editing-text:e010b880347314d07e3ce3b21cbd4c57add51fea3474677a6cb1316751c4cb90",
|
||||
input={
|
||||
"mode": "edit",
|
||||
"video_in": file_url,
|
||||
"transcription": transcription,
|
||||
"split_at": split_at,
|
||||
},
|
||||
wait=False,
|
||||
)
|
||||
|
||||
# Get video URL from output
|
||||
if isinstance(output, dict) and "video" in output:
|
||||
video_output = output["video"]
|
||||
if isinstance(video_output, FileOutput):
|
||||
return video_output.url
|
||||
return str(video_output)
|
||||
elif isinstance(output, list) and len(output) > 0:
|
||||
video_url = output[0]
|
||||
if isinstance(video_url, FileOutput):
|
||||
return video_url.url
|
||||
return str(video_url)
|
||||
elif isinstance(output, FileOutput):
|
||||
return output.url
|
||||
elif isinstance(output, str):
|
||||
return output
|
||||
|
||||
raise ValueError(f"Unexpected output format from Replicate API: {output}")
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
credentials: APIKeyCredentials,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
try:
|
||||
local_path = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=input_data.video_in,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
abs_path = get_exec_file_path(graph_exec_id, local_path)
|
||||
|
||||
video_url = await self.edit_video(
|
||||
abs_path,
|
||||
input_data.transcription,
|
||||
input_data.split_at,
|
||||
credentials.api_key,
|
||||
)
|
||||
|
||||
yield "video_url", video_url
|
||||
yield "transcription", input_data.transcription
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to edit video: {str(e)}"
|
||||
yield "error", error_msg
|
||||
135
autogpt_platform/backend/backend/blocks/transcribe_video.py
Normal file
135
autogpt_platform/backend/backend/blocks/transcribe_video.py
Normal file
@@ -0,0 +1,135 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import SecretStr
|
||||
from replicate.client import Client as ReplicateClient
|
||||
from replicate.helpers import FileOutput
|
||||
|
||||
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
|
||||
from backend.data.model import (
|
||||
APIKeyCredentials,
|
||||
CredentialsField,
|
||||
CredentialsMetaInput,
|
||||
SchemaField,
|
||||
)
|
||||
from backend.integrations.providers import ProviderName
|
||||
from backend.util.file import get_exec_file_path, store_media_file
|
||||
from backend.util.type import MediaFileType
|
||||
|
||||
TEST_CREDENTIALS = APIKeyCredentials(
|
||||
id="01234567-89ab-cdef-0123-456789abcdef",
|
||||
provider="replicate",
|
||||
api_key=SecretStr("mock-replicate-api-key"),
|
||||
title="Mock Replicate API key",
|
||||
expires_at=None,
|
||||
)
|
||||
TEST_CREDENTIALS_INPUT = {
|
||||
"provider": TEST_CREDENTIALS.provider,
|
||||
"id": TEST_CREDENTIALS.id,
|
||||
"type": TEST_CREDENTIALS.type,
|
||||
"title": TEST_CREDENTIALS.title,
|
||||
}
|
||||
|
||||
|
||||
class TranscribeVideoBlock(Block):
|
||||
class Input(BlockSchema):
|
||||
credentials: CredentialsMetaInput[
|
||||
Literal[ProviderName.REPLICATE], Literal["api_key"]
|
||||
] = CredentialsField(
|
||||
description="The Replicate integration can be used with "
|
||||
"any API key with sufficient permissions for the blocks it is used on.",
|
||||
)
|
||||
video_in: MediaFileType = SchemaField(
|
||||
description="Video file to transcribe",
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
transcription: str = SchemaField(
|
||||
description="Text transcription of the video",
|
||||
)
|
||||
error: str = SchemaField(
|
||||
description="Error message if something fails",
|
||||
default="",
|
||||
)
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
id="fa49dad0-a5fc-441c-ba04-2ac206e392d8",
|
||||
description="Transcribes speech from a video file.",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=TranscribeVideoBlock.Input,
|
||||
output_schema=TranscribeVideoBlock.Output,
|
||||
test_input={
|
||||
"credentials": TEST_CREDENTIALS_INPUT,
|
||||
"video_in": "data:video/mp4;base64,AAAA",
|
||||
},
|
||||
test_output=("transcription", "example transcript"),
|
||||
test_mock={"transcribe": lambda file_path, api_key: "example transcript"},
|
||||
test_credentials=TEST_CREDENTIALS,
|
||||
)
|
||||
|
||||
async def transcribe(self, file_path: str, api_key: SecretStr) -> str:
|
||||
"""Use Replicate's API to transcribe the video."""
|
||||
try:
|
||||
client = ReplicateClient(api_token=api_key.get_secret_value())
|
||||
|
||||
# Convert file path to file URL
|
||||
with open(file_path, "rb") as f:
|
||||
file_data = f.read()
|
||||
file_b64 = base64.b64encode(file_data).decode()
|
||||
file_url = f"data:video/mp4;base64,{file_b64}"
|
||||
|
||||
output = await client.async_run(
|
||||
"jd7h/edit-video-by-editing-text:e010b880347314d07e3ce3b21cbd4c57add51fea3474677a6cb1316751c4cb90",
|
||||
input={
|
||||
"mode": "transcribe",
|
||||
"video_in": file_url,
|
||||
},
|
||||
wait=False,
|
||||
)
|
||||
|
||||
# Handle dictionary response format
|
||||
if isinstance(output, dict):
|
||||
if "transcription" in output:
|
||||
return output["transcription"]
|
||||
elif "error" in output:
|
||||
raise ValueError(f"API returned error: {output['error']}")
|
||||
# Handle list/string formats as before
|
||||
elif isinstance(output, list) and len(output) > 0:
|
||||
if isinstance(output[0], FileOutput):
|
||||
return output[0].url
|
||||
return output[0]
|
||||
elif isinstance(output, FileOutput):
|
||||
return output.url
|
||||
elif isinstance(output, str):
|
||||
return output
|
||||
|
||||
raise ValueError(f"Unexpected output format from Replicate API: {output}")
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
credentials: APIKeyCredentials,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
try:
|
||||
local_path = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=input_data.video_in,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
abs_path = get_exec_file_path(graph_exec_id, local_path)
|
||||
|
||||
transcript = await self.transcribe(abs_path, credentials.api_key)
|
||||
yield "transcription", transcript
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to transcribe video: {str(e)}"
|
||||
yield "error", error_msg
|
||||
@@ -93,11 +93,13 @@ Below is a comprehensive list of all available blocks, categorized by their prim
|
||||
| [AI Shortform Video Creator](ai_shortform_video_block.md#ai-shortform-video-creator) | Generates short-form videos using AI |
|
||||
| [Replicate Flux Advanced Model](replicate_flux_advanced.md#replicate-flux-advanced-model) | Creates images using Replicate's Flux models |
|
||||
| [Flux Kontext](flux_kontext.md#flux-kontext) | Text-based image editing using Flux Kontext |
|
||||
| [Edit Video by Text](edit_video_by_text.md#edit-video-by-text) | Edit videos by modifying their transcript |
|
||||
|
||||
## Miscellaneous
|
||||
| Block Name | Description |
|
||||
|------------|-------------|
|
||||
| [Transcribe YouTube Video](youtube.md#transcribe-youtube-video) | Transcribes audio from YouTube videos |
|
||||
| [Transcribe Video](transcribe_video.md#transcribe-video) | Converts speech in a video file to text |
|
||||
| [Send Email](email_block.md#send-email) | Sends emails using SMTP |
|
||||
| [Condition Block](branching.md#condition-block) | Evaluates conditions for workflow branching |
|
||||
| [Step Through Items](iteration.md#step-through-items) | Iterates through lists or dictionaries |
|
||||
|
||||
27
docs/platform/blocks/edit_video_by_text.md
Normal file
27
docs/platform/blocks/edit_video_by_text.md
Normal file
@@ -0,0 +1,27 @@
|
||||
## Edit Video by Text
|
||||
|
||||
### What it is
|
||||
A block that edits a video by cutting segments based on an edited transcript.
|
||||
|
||||
### What it does
|
||||
After providing a target transcript, the block removes portions of the video that no longer appear in the text, returning a new edited video file.
|
||||
|
||||
### How it works
|
||||
The block compares the supplied transcript with the video's original transcript. Segments that are missing from the target transcript are removed. Word-level matching is used by default.
|
||||
|
||||
### Inputs
|
||||
| Input | Description |
|
||||
|-------|-------------|
|
||||
| Video | The original video file to edit. |
|
||||
| Transcription | The desired transcript of the output video. |
|
||||
| Split At | Level of precision for transcript matching ("word" or "character"). |
|
||||
|
||||
### Outputs
|
||||
| Output | Description |
|
||||
|--------|-------------|
|
||||
| Video | Path to the edited video. |
|
||||
| Transcription | The transcript used to generate the edited video. |
|
||||
| Error | Error message if editing fails. |
|
||||
|
||||
### Possible use case
|
||||
Create a shorter version of a training video by removing sentences from the transcript instead of using a timeline-based video editor.
|
||||
24
docs/platform/blocks/transcribe_video.md
Normal file
24
docs/platform/blocks/transcribe_video.md
Normal file
@@ -0,0 +1,24 @@
|
||||
## Transcribe Video
|
||||
|
||||
### What it is
|
||||
A block that converts the speech in a video file into text.
|
||||
|
||||
### What it does
|
||||
This block accepts a video and returns a text transcription of the spoken content.
|
||||
|
||||
### How it works
|
||||
The block processes the provided video using a speech‑to‑text engine. The resulting text is returned for use in other blocks, such as text‑based video editing.
|
||||
|
||||
### Inputs
|
||||
| Input | Description |
|
||||
|-------|-------------|
|
||||
| Video | The video file to transcribe. |
|
||||
|
||||
### Outputs
|
||||
| Output | Description |
|
||||
|--------|-------------|
|
||||
| Transcription | The text transcription extracted from the video. |
|
||||
| Error | Error message if the transcription fails. |
|
||||
|
||||
### Possible use case
|
||||
Use this block to generate a transcript of a recorded meeting so that you can review or edit the content in text form.
|
||||
Reference in New Issue
Block a user