diff --git a/autogpt_platform/backend/backend/blocks/media.py b/autogpt_platform/backend/backend/blocks/media.py deleted file mode 100644 index a8d145bc64..0000000000 --- a/autogpt_platform/backend/backend/blocks/media.py +++ /dev/null @@ -1,246 +0,0 @@ -import os -import tempfile -from typing import Optional - -from moviepy.audio.io.AudioFileClip import AudioFileClip -from moviepy.video.fx.Loop import Loop -from moviepy.video.io.VideoFileClip import VideoFileClip - -from backend.data.block import ( - Block, - BlockCategory, - BlockOutput, - BlockSchemaInput, - BlockSchemaOutput, -) -from backend.data.execution import ExecutionContext -from backend.data.model import SchemaField -from backend.util.file import MediaFileType, get_exec_file_path, store_media_file - - -class MediaDurationBlock(Block): - - class Input(BlockSchemaInput): - media_in: MediaFileType = SchemaField( - description="Media input (URL, data URI, or local path)." - ) - is_video: bool = SchemaField( - description="Whether the media is a video (True) or audio (False).", - default=True, - ) - - class Output(BlockSchemaOutput): - duration: float = SchemaField( - description="Duration of the media file (in seconds)." - ) - - def __init__(self): - super().__init__( - id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6", - description="Block to get the duration of a media file.", - categories={BlockCategory.MULTIMEDIA}, - input_schema=MediaDurationBlock.Input, - output_schema=MediaDurationBlock.Output, - ) - - async def run( - self, - input_data: Input, - *, - execution_context: ExecutionContext, - **kwargs, - ) -> BlockOutput: - # 1) Store the input media locally - local_media_path = await store_media_file( - file=input_data.media_in, - execution_context=execution_context, - return_format="for_local_processing", - ) - assert execution_context.graph_exec_id is not None - media_abspath = get_exec_file_path( - execution_context.graph_exec_id, local_media_path - ) - - # 2) Load the clip - if input_data.is_video: - clip = VideoFileClip(media_abspath) - else: - clip = AudioFileClip(media_abspath) - - yield "duration", clip.duration - - -class LoopVideoBlock(Block): - """ - Block for looping (repeating) a video clip until a given duration or number of loops. - """ - - class Input(BlockSchemaInput): - video_in: MediaFileType = SchemaField( - description="The input video (can be a URL, data URI, or local path)." - ) - # Provide EITHER a `duration` or `n_loops` or both. We'll demonstrate `duration`. - duration: Optional[float] = SchemaField( - description="Target duration (in seconds) to loop the video to. If omitted, defaults to no looping.", - default=None, - ge=0.0, - ) - n_loops: Optional[int] = SchemaField( - description="Number of times to repeat the video. If omitted, defaults to 1 (no repeat).", - default=None, - ge=1, - ) - - class Output(BlockSchemaOutput): - video_out: str = SchemaField( - description="Looped video returned either as a relative path or a data URI." - ) - - def __init__(self): - super().__init__( - id="8bf9eef6-5451-4213-b265-25306446e94b", - description="Block to loop a video to a given duration or number of repeats.", - categories={BlockCategory.MULTIMEDIA}, - input_schema=LoopVideoBlock.Input, - output_schema=LoopVideoBlock.Output, - ) - - async def run( - self, - input_data: Input, - *, - execution_context: ExecutionContext, - **kwargs, - ) -> BlockOutput: - assert execution_context.graph_exec_id is not None - assert execution_context.node_exec_id is not None - graph_exec_id = execution_context.graph_exec_id - node_exec_id = execution_context.node_exec_id - - # 1) Store the input video locally - local_video_path = await store_media_file( - file=input_data.video_in, - execution_context=execution_context, - return_format="for_local_processing", - ) - input_abspath = get_exec_file_path(graph_exec_id, local_video_path) - - # 2) Load the clip - clip = VideoFileClip(input_abspath) - - # 3) Apply the loop effect - looped_clip = clip - if input_data.duration: - # Loop until we reach the specified duration - looped_clip = looped_clip.with_effects([Loop(duration=input_data.duration)]) - elif input_data.n_loops: - looped_clip = looped_clip.with_effects([Loop(n=input_data.n_loops)]) - else: - raise ValueError("Either 'duration' or 'n_loops' must be provided.") - - assert isinstance(looped_clip, VideoFileClip) - - # 4) Save the looped output - output_filename = MediaFileType( - f"{node_exec_id}_looped_{os.path.basename(local_video_path)}" - ) - output_abspath = get_exec_file_path(graph_exec_id, output_filename) - - looped_clip = looped_clip.with_audio(clip.audio) - looped_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac") - - # Return output - for_block_output returns workspace:// if available, else data URI - video_out = await store_media_file( - file=output_filename, - execution_context=execution_context, - return_format="for_block_output", - ) - - yield "video_out", video_out - - -class AddAudioToVideoBlock(Block): - """ - Block that adds (attaches) an audio track to an existing video. - Optionally scale the volume of the new track. - """ - - class Input(BlockSchemaInput): - video_in: MediaFileType = SchemaField( - description="Video input (URL, data URI, or local path)." - ) - audio_in: MediaFileType = SchemaField( - description="Audio input (URL, data URI, or local path)." - ) - volume: float = SchemaField( - description="Volume scale for the newly attached audio track (1.0 = original).", - default=1.0, - ) - - class Output(BlockSchemaOutput): - video_out: MediaFileType = SchemaField( - description="Final video (with attached audio), as a path or data URI." - ) - - def __init__(self): - super().__init__( - id="3503748d-62b6-4425-91d6-725b064af509", - description="Block to attach an audio file to a video file using moviepy.", - categories={BlockCategory.MULTIMEDIA}, - input_schema=AddAudioToVideoBlock.Input, - output_schema=AddAudioToVideoBlock.Output, - ) - - async def run( - self, - input_data: Input, - *, - execution_context: ExecutionContext, - **kwargs, - ) -> BlockOutput: - assert execution_context.graph_exec_id is not None - assert execution_context.node_exec_id is not None - graph_exec_id = execution_context.graph_exec_id - node_exec_id = execution_context.node_exec_id - - # 1) Store the inputs locally - local_video_path = await store_media_file( - file=input_data.video_in, - execution_context=execution_context, - return_format="for_local_processing", - ) - local_audio_path = await store_media_file( - file=input_data.audio_in, - execution_context=execution_context, - return_format="for_local_processing", - ) - - abs_temp_dir = os.path.join(tempfile.gettempdir(), "exec_file", graph_exec_id) - video_abspath = os.path.join(abs_temp_dir, local_video_path) - audio_abspath = os.path.join(abs_temp_dir, local_audio_path) - - # 2) Load video + audio with moviepy - video_clip = VideoFileClip(video_abspath) - audio_clip = AudioFileClip(audio_abspath) - # Optionally scale volume - if input_data.volume != 1.0: - audio_clip = audio_clip.with_volume_scaled(input_data.volume) - - # 3) Attach the new audio track - final_clip = video_clip.with_audio(audio_clip) - - # 4) Write to output file - output_filename = MediaFileType( - f"{node_exec_id}_audio_attached_{os.path.basename(local_video_path)}" - ) - output_abspath = os.path.join(abs_temp_dir, output_filename) - final_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac") - - # 5) Return output - for_block_output returns workspace:// if available, else data URI - video_out = await store_media_file( - file=output_filename, - execution_context=execution_context, - return_format="for_block_output", - ) - - yield "video_out", video_out diff --git a/autogpt_platform/backend/backend/blocks/video/__init__.py b/autogpt_platform/backend/backend/blocks/video/__init__.py index 417903a409..4974ae8a87 100644 --- a/autogpt_platform/backend/backend/blocks/video/__init__.py +++ b/autogpt_platform/backend/backend/blocks/video/__init__.py @@ -6,23 +6,29 @@ This module provides blocks for: - Concatenating multiple videos - Adding text overlays - Adding AI-generated narration - -Note: MediaDurationBlock, LoopVideoBlock, and AddAudioToVideoBlock are -provided by backend/blocks/media.py. +- Getting media duration +- Looping videos +- Adding audio to videos Dependencies: - yt-dlp: For video downloading - moviepy: For video editing operations -- requests: For API calls (narration block) +- elevenlabs: For AI narration (optional) """ +from backend.blocks.video.add_audio import AddAudioToVideoBlock from backend.blocks.video.clip import VideoClipBlock from backend.blocks.video.concat import VideoConcatBlock from backend.blocks.video.download import VideoDownloadBlock +from backend.blocks.video.duration import MediaDurationBlock +from backend.blocks.video.loop import LoopVideoBlock from backend.blocks.video.narration import VideoNarrationBlock from backend.blocks.video.text_overlay import VideoTextOverlayBlock __all__ = [ + "AddAudioToVideoBlock", + "LoopVideoBlock", + "MediaDurationBlock", "VideoClipBlock", "VideoConcatBlock", "VideoDownloadBlock", diff --git a/autogpt_platform/backend/backend/blocks/video/add_audio.py b/autogpt_platform/backend/backend/blocks/video/add_audio.py new file mode 100644 index 0000000000..9d66b86888 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/add_audio.py @@ -0,0 +1,102 @@ +"""AddAudioToVideoBlock - Attach an audio track to a video file.""" + +import os +import tempfile + +from moviepy.audio.io.AudioFileClip import AudioFileClip +from moviepy.video.io.VideoFileClip import VideoFileClip + +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.execution import ExecutionContext +from backend.data.model import SchemaField +from backend.util.file import MediaFileType, store_media_file + + +class AddAudioToVideoBlock(Block): + """Add (attach) an audio track to an existing video.""" + + class Input(BlockSchemaInput): + video_in: MediaFileType = SchemaField( + description="Video input (URL, data URI, or local path)." + ) + audio_in: MediaFileType = SchemaField( + description="Audio input (URL, data URI, or local path)." + ) + volume: float = SchemaField( + description="Volume scale for the newly attached audio track (1.0 = original).", + default=1.0, + ) + + class Output(BlockSchemaOutput): + video_out: MediaFileType = SchemaField( + description="Final video (with attached audio), as a path or data URI." + ) + + def __init__(self): + super().__init__( + id="3503748d-62b6-4425-91d6-725b064af509", + description="Block to attach an audio file to a video file using moviepy.", + categories={BlockCategory.MULTIMEDIA}, + input_schema=AddAudioToVideoBlock.Input, + output_schema=AddAudioToVideoBlock.Output, + ) + + async def run( + self, + input_data: Input, + *, + execution_context: ExecutionContext, + **kwargs, + ) -> BlockOutput: + assert execution_context.graph_exec_id is not None + assert execution_context.node_exec_id is not None + graph_exec_id = execution_context.graph_exec_id + node_exec_id = execution_context.node_exec_id + + # 1) Store the inputs locally + local_video_path = await store_media_file( + file=input_data.video_in, + execution_context=execution_context, + return_format="for_local_processing", + ) + local_audio_path = await store_media_file( + file=input_data.audio_in, + execution_context=execution_context, + return_format="for_local_processing", + ) + + abs_temp_dir = os.path.join(tempfile.gettempdir(), "exec_file", graph_exec_id) + video_abspath = os.path.join(abs_temp_dir, local_video_path) + audio_abspath = os.path.join(abs_temp_dir, local_audio_path) + + # 2) Load video + audio with moviepy + video_clip = VideoFileClip(video_abspath) + audio_clip = AudioFileClip(audio_abspath) + # Optionally scale volume + if input_data.volume != 1.0: + audio_clip = audio_clip.with_volume_scaled(input_data.volume) + + # 3) Attach the new audio track + final_clip = video_clip.with_audio(audio_clip) + + # 4) Write to output file + output_filename = MediaFileType( + f"{node_exec_id}_audio_attached_{os.path.basename(local_video_path)}" + ) + output_abspath = os.path.join(abs_temp_dir, output_filename) + final_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac") + + # 5) Return output - for_block_output returns workspace:// if available, else data URI + video_out = await store_media_file( + file=output_filename, + execution_context=execution_context, + return_format="for_block_output", + ) + + yield "video_out", video_out diff --git a/autogpt_platform/backend/backend/blocks/video/duration.py b/autogpt_platform/backend/backend/blocks/video/duration.py new file mode 100644 index 0000000000..79c3de765e --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/duration.py @@ -0,0 +1,68 @@ +"""MediaDurationBlock - Get the duration of a media file.""" + +from moviepy.audio.io.AudioFileClip import AudioFileClip +from moviepy.video.io.VideoFileClip import VideoFileClip + +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.execution import ExecutionContext +from backend.data.model import SchemaField +from backend.util.file import MediaFileType, get_exec_file_path, store_media_file + + +class MediaDurationBlock(Block): + """Get the duration of a media file (video or audio).""" + + class Input(BlockSchemaInput): + media_in: MediaFileType = SchemaField( + description="Media input (URL, data URI, or local path)." + ) + is_video: bool = SchemaField( + description="Whether the media is a video (True) or audio (False).", + default=True, + ) + + class Output(BlockSchemaOutput): + duration: float = SchemaField( + description="Duration of the media file (in seconds)." + ) + + def __init__(self): + super().__init__( + id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6", + description="Block to get the duration of a media file.", + categories={BlockCategory.MULTIMEDIA}, + input_schema=MediaDurationBlock.Input, + output_schema=MediaDurationBlock.Output, + ) + + async def run( + self, + input_data: Input, + *, + execution_context: ExecutionContext, + **kwargs, + ) -> BlockOutput: + # 1) Store the input media locally + local_media_path = await store_media_file( + file=input_data.media_in, + execution_context=execution_context, + return_format="for_local_processing", + ) + assert execution_context.graph_exec_id is not None + media_abspath = get_exec_file_path( + execution_context.graph_exec_id, local_media_path + ) + + # 2) Load the clip + if input_data.is_video: + clip = VideoFileClip(media_abspath) + else: + clip = AudioFileClip(media_abspath) + + yield "duration", clip.duration diff --git a/autogpt_platform/backend/backend/blocks/video/loop.py b/autogpt_platform/backend/backend/blocks/video/loop.py new file mode 100644 index 0000000000..7b7c08b3e3 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/loop.py @@ -0,0 +1,104 @@ +"""LoopVideoBlock - Loop a video to a given duration or number of repeats.""" + +import os +from typing import Optional + +from moviepy.video.fx.Loop import Loop +from moviepy.video.io.VideoFileClip import VideoFileClip + +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.execution import ExecutionContext +from backend.data.model import SchemaField +from backend.util.file import MediaFileType, get_exec_file_path, store_media_file + + +class LoopVideoBlock(Block): + """Loop (repeat) a video clip until a given duration or number of loops.""" + + class Input(BlockSchemaInput): + video_in: MediaFileType = SchemaField( + description="The input video (can be a URL, data URI, or local path)." + ) + duration: Optional[float] = SchemaField( + description="Target duration (in seconds) to loop the video to. If omitted, defaults to no looping.", + default=None, + ge=0.0, + ) + n_loops: Optional[int] = SchemaField( + description="Number of times to repeat the video. If omitted, defaults to 1 (no repeat).", + default=None, + ge=1, + ) + + class Output(BlockSchemaOutput): + video_out: str = SchemaField( + description="Looped video returned either as a relative path or a data URI." + ) + + def __init__(self): + super().__init__( + id="8bf9eef6-5451-4213-b265-25306446e94b", + description="Block to loop a video to a given duration or number of repeats.", + categories={BlockCategory.MULTIMEDIA}, + input_schema=LoopVideoBlock.Input, + output_schema=LoopVideoBlock.Output, + ) + + async def run( + self, + input_data: Input, + *, + execution_context: ExecutionContext, + **kwargs, + ) -> BlockOutput: + assert execution_context.graph_exec_id is not None + assert execution_context.node_exec_id is not None + graph_exec_id = execution_context.graph_exec_id + node_exec_id = execution_context.node_exec_id + + # 1) Store the input video locally + local_video_path = await store_media_file( + file=input_data.video_in, + execution_context=execution_context, + return_format="for_local_processing", + ) + input_abspath = get_exec_file_path(graph_exec_id, local_video_path) + + # 2) Load the clip + clip = VideoFileClip(input_abspath) + + # 3) Apply the loop effect + looped_clip = clip + if input_data.duration: + # Loop until we reach the specified duration + looped_clip = looped_clip.with_effects([Loop(duration=input_data.duration)]) + elif input_data.n_loops: + looped_clip = looped_clip.with_effects([Loop(n=input_data.n_loops)]) + else: + raise ValueError("Either 'duration' or 'n_loops' must be provided.") + + assert isinstance(looped_clip, VideoFileClip) + + # 4) Save the looped output + output_filename = MediaFileType( + f"{node_exec_id}_looped_{os.path.basename(local_video_path)}" + ) + output_abspath = get_exec_file_path(graph_exec_id, output_filename) + + looped_clip = looped_clip.with_audio(clip.audio) + looped_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac") + + # Return output - for_block_output returns workspace:// if available, else data URI + video_out = await store_media_file( + file=output_filename, + execution_context=execution_context, + return_format="for_block_output", + ) + + yield "video_out", video_out