mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
<!-- Clearly explain the need for these changes: -->
This PR adds general-purpose video editing blocks for the AutoGPT
Platform, enabling automated video production workflows like documentary
creation, marketing videos, tutorial assembly, and content repurposing.
### Changes 🏗️
<!-- Concisely describe all of the changes made in this pull request:
-->
**New blocks added in `backend/blocks/video/`:**
- `VideoDownloadBlock` - Download videos from URLs (YouTube, Vimeo, news
sites, direct links) using yt-dlp
- `VideoClipBlock` - Extract time segments from videos with start/end
time validation
- `VideoConcatBlock` - Merge multiple video clips with optional
transitions (none, crossfade, fade_black)
- `VideoTextOverlayBlock` - Add text overlays/captions with positioning
and timing options
- `VideoNarrationBlock` - Generate AI narration via ElevenLabs and mix
with video audio (replace, mix, or ducking modes)
**Dependencies required:**
- `yt-dlp` - For video downloading
- `moviepy` - For video editing operations
**Implementation details:**
- All blocks follow the SDK pattern with proper error handling and
exception chaining
- Proper resource cleanup in `finally` blocks to prevent memory leaks
- Input validation (e.g., end_time > start_time)
- Test mocks included for CI
### Checklist 📋
#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
- [x] Blocks follow the SDK pattern with
`BlockSchemaInput`/`BlockSchemaOutput`
- [x] Resource cleanup is implemented in `finally` blocks
- [x] Exception chaining is properly implemented
- [x] Input validation is in place
- [x] Test mocks are provided for CI environments
#### For configuration changes:
- [ ] `.env.default` is updated or already compatible with my changes
- [x] `docker-compose.yml` is updated or already compatible with my
changes
- [ ] I have included a list of my configuration changes in the PR
description (under **Changes**)
N/A - No configuration changes required.
<!-- CURSOR_SUMMARY -->
---
> [!NOTE]
> **Medium Risk**
> Adds new multimedia blocks that invoke ffmpeg/MoviePy and introduces
new external dependencies (plus container packages), which can impact
runtime stability and resource usage; download/overlay blocks are
present but disabled due to sandbox/policy concerns.
>
> **Overview**
> Adds a new `backend.blocks.video` module with general-purpose video
workflow blocks (download, clip, concat w/ transitions, loop, add-audio,
text overlay, and ElevenLabs-powered narration), including shared
utilities for codec selection, filename cleanup, and an ffmpeg-based
chapter-strip workaround for MoviePy.
>
> Extends credentials/config to support ElevenLabs
(`ELEVENLABS_API_KEY`, provider enum, system credentials, and cost
config) and adds new dependencies (`elevenlabs`, `yt-dlp`) plus Docker
runtime packages (`ffmpeg`, `imagemagick`).
>
> Improves file/reference handling end-to-end by embedding MIME types in
`workspace://...#mime` outputs and updating frontend rendering to detect
video vs image from MIME fragments (and broaden supported audio/video
extensions), with optional enhanced output rendering behind a feature
flag in the legacy builder UI.
>
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
da7a44d794. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
---------
Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>
Co-authored-by: Nicholas Tindle <ntindle@users.noreply.github.com>
Co-authored-by: Otto <otto@agpt.co>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
132 lines
3.9 KiB
Python
132 lines
3.9 KiB
Python
"""Shared utilities for video blocks."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import re
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Known operation tags added by video blocks
|
|
_VIDEO_OPS = (
|
|
r"(?:clip|overlay|narrated|looped|concat|audio_attached|with_audio|narration)"
|
|
)
|
|
|
|
# Matches: {node_exec_id}_{operation}_ where node_exec_id contains a UUID
|
|
_BLOCK_PREFIX_RE = re.compile(
|
|
r"^[a-zA-Z0-9_-]*"
|
|
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
|
|
r"[a-zA-Z0-9_-]*"
|
|
r"_" + _VIDEO_OPS + r"_"
|
|
)
|
|
|
|
# Matches: a lone {node_exec_id}_ prefix (no operation keyword, e.g. download output)
|
|
_UUID_PREFIX_RE = re.compile(
|
|
r"^[a-zA-Z0-9_-]*"
|
|
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
|
|
r"[a-zA-Z0-9_-]*_"
|
|
)
|
|
|
|
|
|
def extract_source_name(input_path: str, max_length: int = 50) -> str:
|
|
"""Extract the original source filename by stripping block-generated prefixes.
|
|
|
|
Iteratively removes {node_exec_id}_{operation}_ prefixes that accumulate
|
|
when chaining video blocks, recovering the original human-readable name.
|
|
|
|
Safe for plain filenames (no UUID -> no stripping).
|
|
Falls back to "video" if everything is stripped.
|
|
"""
|
|
stem = Path(input_path).stem
|
|
|
|
# Pass 1: strip {node_exec_id}_{operation}_ prefixes iteratively
|
|
while _BLOCK_PREFIX_RE.match(stem):
|
|
stem = _BLOCK_PREFIX_RE.sub("", stem, count=1)
|
|
|
|
# Pass 2: strip a lone {node_exec_id}_ prefix (e.g. from download block)
|
|
if _UUID_PREFIX_RE.match(stem):
|
|
stem = _UUID_PREFIX_RE.sub("", stem, count=1)
|
|
|
|
if not stem:
|
|
return "video"
|
|
|
|
return stem[:max_length]
|
|
|
|
|
|
def get_video_codecs(output_path: str) -> tuple[str, str]:
|
|
"""Get appropriate video and audio codecs based on output file extension.
|
|
|
|
Args:
|
|
output_path: Path to the output file (used to determine extension)
|
|
|
|
Returns:
|
|
Tuple of (video_codec, audio_codec)
|
|
|
|
Codec mappings:
|
|
- .mp4: H.264 + AAC (universal compatibility)
|
|
- .webm: VP8 + Vorbis (web streaming)
|
|
- .mkv: H.264 + AAC (container supports many codecs)
|
|
- .mov: H.264 + AAC (Apple QuickTime, widely compatible)
|
|
- .m4v: H.264 + AAC (Apple iTunes/devices)
|
|
- .avi: MPEG-4 + MP3 (legacy Windows)
|
|
"""
|
|
ext = os.path.splitext(output_path)[1].lower()
|
|
|
|
codec_map: dict[str, tuple[str, str]] = {
|
|
".mp4": ("libx264", "aac"),
|
|
".webm": ("libvpx", "libvorbis"),
|
|
".mkv": ("libx264", "aac"),
|
|
".mov": ("libx264", "aac"),
|
|
".m4v": ("libx264", "aac"),
|
|
".avi": ("mpeg4", "libmp3lame"),
|
|
}
|
|
|
|
return codec_map.get(ext, ("libx264", "aac"))
|
|
|
|
|
|
def strip_chapters_inplace(video_path: str) -> None:
|
|
"""Strip chapter metadata from a media file in-place using ffmpeg.
|
|
|
|
MoviePy 2.x crashes with IndexError when parsing files with embedded
|
|
chapter metadata (https://github.com/Zulko/moviepy/issues/2419).
|
|
This strips chapters without re-encoding.
|
|
|
|
Args:
|
|
video_path: Absolute path to the media file to strip chapters from.
|
|
"""
|
|
base, ext = os.path.splitext(video_path)
|
|
tmp_path = base + ".tmp" + ext
|
|
try:
|
|
result = subprocess.run(
|
|
[
|
|
"ffmpeg",
|
|
"-y",
|
|
"-i",
|
|
video_path,
|
|
"-map_chapters",
|
|
"-1",
|
|
"-codec",
|
|
"copy",
|
|
tmp_path,
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=300,
|
|
)
|
|
if result.returncode != 0:
|
|
logger.warning(
|
|
"ffmpeg chapter strip failed (rc=%d): %s",
|
|
result.returncode,
|
|
result.stderr,
|
|
)
|
|
return
|
|
os.replace(tmp_path, video_path)
|
|
except FileNotFoundError:
|
|
logger.warning("ffmpeg not found; skipping chapter strip")
|
|
finally:
|
|
if os.path.exists(tmp_path):
|
|
os.unlink(tmp_path)
|