diff --git a/.github/workflows/platform-frontend-ci.yml b/.github/workflows/platform-frontend-ci.yml index 499bb03170..14676a6a1f 100644 --- a/.github/workflows/platform-frontend-ci.yml +++ b/.github/workflows/platform-frontend-ci.yml @@ -27,11 +27,20 @@ jobs: runs-on: ubuntu-latest outputs: cache-key: ${{ steps.cache-key.outputs.key }} + components-changed: ${{ steps.filter.outputs.components }} steps: - name: Checkout repository uses: actions/checkout@v4 + - name: Check for component changes + uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + components: + - 'autogpt_platform/frontend/src/components/**' + - name: Set up Node.js uses: actions/setup-node@v4 with: @@ -90,8 +99,11 @@ jobs: chromatic: runs-on: ubuntu-latest needs: setup - # Only run on dev branch pushes or PRs targeting dev - if: github.ref == 'refs/heads/dev' || github.base_ref == 'dev' + # Disabled: to re-enable, remove 'false &&' from the condition below + if: >- + false + && (github.ref == 'refs/heads/dev' || github.base_ref == 'dev') + && needs.setup.outputs.components-changed == 'true' steps: - name: Checkout repository diff --git a/autogpt_platform/backend/.env.default b/autogpt_platform/backend/.env.default index b393f13017..fa52ba812a 100644 --- a/autogpt_platform/backend/.env.default +++ b/autogpt_platform/backend/.env.default @@ -152,6 +152,7 @@ REPLICATE_API_KEY= REVID_API_KEY= SCREENSHOTONE_API_KEY= UNREAL_SPEECH_API_KEY= +ELEVENLABS_API_KEY= # Data & Search Services E2B_API_KEY= diff --git a/autogpt_platform/backend/.gitignore b/autogpt_platform/backend/.gitignore index 9224c07d9e..6e688311a6 100644 --- a/autogpt_platform/backend/.gitignore +++ b/autogpt_platform/backend/.gitignore @@ -19,3 +19,6 @@ load-tests/*.json load-tests/*.log load-tests/node_modules/* migrations/*/rollback*.sql + +# Workspace files +workspaces/ diff --git a/autogpt_platform/backend/Dockerfile b/autogpt_platform/backend/Dockerfile index 103226d079..9bd455e490 100644 --- a/autogpt_platform/backend/Dockerfile +++ b/autogpt_platform/backend/Dockerfile @@ -62,10 +62,12 @@ ENV POETRY_HOME=/opt/poetry \ DEBIAN_FRONTEND=noninteractive ENV PATH=/opt/poetry/bin:$PATH -# Install Python without upgrading system-managed packages +# Install Python, FFmpeg, and ImageMagick (required for video processing blocks) RUN apt-get update && apt-get install -y \ python3.13 \ python3-pip \ + ffmpeg \ + imagemagick \ && rm -rf /var/lib/apt/lists/* # Copy only necessary files from builder diff --git a/autogpt_platform/backend/backend/api/features/chat/config.py b/autogpt_platform/backend/backend/api/features/chat/config.py index 2e8dbf5413..0b37e42df8 100644 --- a/autogpt_platform/backend/backend/api/features/chat/config.py +++ b/autogpt_platform/backend/backend/api/features/chat/config.py @@ -11,7 +11,7 @@ class ChatConfig(BaseSettings): # OpenAI API Configuration model: str = Field( - default="anthropic/claude-opus-4.5", description="Default model to use" + default="anthropic/claude-opus-4.6", description="Default model to use" ) title_model: str = Field( default="openai/gpt-4o-mini", diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py index 218575085b..06da6bdf2b 100644 --- a/autogpt_platform/backend/backend/api/features/chat/service.py +++ b/autogpt_platform/backend/backend/api/features/chat/service.py @@ -33,7 +33,7 @@ from backend.data.understanding import ( get_business_understanding, ) from backend.util.exceptions import NotFoundError -from backend.util.settings import Settings +from backend.util.settings import AppEnvironment, Settings from . import db as chat_db from . import stream_registry @@ -222,8 +222,18 @@ async def _get_system_prompt_template(context: str) -> str: try: # cache_ttl_seconds=0 disables SDK caching to always get the latest prompt # Use asyncio.to_thread to avoid blocking the event loop + # In non-production environments, fetch the latest prompt version + # instead of the production-labeled version for easier testing + label = ( + None + if settings.config.app_env == AppEnvironment.PRODUCTION + else "latest" + ) prompt = await asyncio.to_thread( - langfuse.get_prompt, config.langfuse_prompt_name, cache_ttl_seconds=0 + langfuse.get_prompt, + config.langfuse_prompt_name, + label=label, + cache_ttl_seconds=0, ) return prompt.compile(users_information=context) except Exception as e: @@ -618,6 +628,9 @@ async def stream_chat_completion( total_tokens=chunk.totalTokens, ) ) + elif isinstance(chunk, StreamHeartbeat): + # Pass through heartbeat to keep SSE connection alive + yield chunk else: logger.error(f"Unknown chunk type: {type(chunk)}", exc_info=True) diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py index b88b9b2924..f83ca30b5c 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py @@ -7,15 +7,7 @@ from typing import Any, NotRequired, TypedDict from backend.api.features.library import db as library_db from backend.api.features.store import db as store_db -from backend.data.graph import ( - Graph, - Link, - Node, - create_graph, - get_graph, - get_graph_all_versions, - get_store_listed_graphs, -) +from backend.data.graph import Graph, Link, Node, get_graph, get_store_listed_graphs from backend.util.exceptions import DatabaseError, NotFoundError from .service import ( @@ -28,8 +20,6 @@ from .service import ( logger = logging.getLogger(__name__) -AGENT_EXECUTOR_BLOCK_ID = "e189baac-8c20-45a1-94a7-55177ea42565" - class ExecutionSummary(TypedDict): """Summary of a single execution for quality assessment.""" @@ -669,45 +659,6 @@ def json_to_graph(agent_json: dict[str, Any]) -> Graph: ) -def _reassign_node_ids(graph: Graph) -> None: - """Reassign all node and link IDs to new UUIDs. - - This is needed when creating a new version to avoid unique constraint violations. - """ - id_map = {node.id: str(uuid.uuid4()) for node in graph.nodes} - - for node in graph.nodes: - node.id = id_map[node.id] - - for link in graph.links: - link.id = str(uuid.uuid4()) - if link.source_id in id_map: - link.source_id = id_map[link.source_id] - if link.sink_id in id_map: - link.sink_id = id_map[link.sink_id] - - -def _populate_agent_executor_user_ids(agent_json: dict[str, Any], user_id: str) -> None: - """Populate user_id in AgentExecutorBlock nodes. - - The external agent generator creates AgentExecutorBlock nodes with empty user_id. - This function fills in the actual user_id so sub-agents run with correct permissions. - - Args: - agent_json: Agent JSON dict (modified in place) - user_id: User ID to set - """ - for node in agent_json.get("nodes", []): - if node.get("block_id") == AGENT_EXECUTOR_BLOCK_ID: - input_default = node.get("input_default") or {} - if not input_default.get("user_id"): - input_default["user_id"] = user_id - node["input_default"] = input_default - logger.debug( - f"Set user_id for AgentExecutorBlock node {node.get('id')}" - ) - - async def save_agent_to_library( agent_json: dict[str, Any], user_id: str, is_update: bool = False ) -> tuple[Graph, Any]: @@ -721,35 +672,10 @@ async def save_agent_to_library( Returns: Tuple of (created Graph, LibraryAgent) """ - # Populate user_id in AgentExecutorBlock nodes before conversion - _populate_agent_executor_user_ids(agent_json, user_id) - graph = json_to_graph(agent_json) - if is_update: - if graph.id: - existing_versions = await get_graph_all_versions(graph.id, user_id) - if existing_versions: - latest_version = max(v.version for v in existing_versions) - graph.version = latest_version + 1 - _reassign_node_ids(graph) - logger.info(f"Updating agent {graph.id} to version {graph.version}") - else: - graph.id = str(uuid.uuid4()) - graph.version = 1 - _reassign_node_ids(graph) - logger.info(f"Creating new agent with ID {graph.id}") - - created_graph = await create_graph(graph, user_id) - - library_agents = await library_db.create_library_agent( - graph=created_graph, - user_id=user_id, - sensitive_action_safe_mode=True, - create_library_agents_for_sub_graphs=False, - ) - - return created_graph, library_agents[0] + return await library_db.update_graph_in_library(graph, user_id) + return await library_db.create_graph_in_library(graph, user_id) def graph_to_json(graph: Graph) -> dict[str, Any]: diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py b/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py index 62d59c470e..61cdba1ef9 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py @@ -206,9 +206,9 @@ async def search_agents( ] ) no_results_msg = ( - f"No agents found matching '{query}'. Try different keywords or browse the marketplace." + f"No agents found matching '{query}'. Let the user know they can try different keywords or browse the marketplace. Also let them know you can create a custom agent for them based on their needs." if source == "marketplace" - else f"No agents matching '{query}' found in your library." + else f"No agents matching '{query}' found in your library. Let the user know you can create a custom agent for them based on their needs." ) return NoResultsResponse( message=no_results_msg, session_id=session_id, suggestions=suggestions @@ -224,10 +224,10 @@ async def search_agents( message = ( "Now you have found some options for the user to choose from. " "You can add a link to a recommended agent at: /marketplace/agent/agent_id " - "Please ask the user if they would like to use any of these agents." + "Please ask the user if they would like to use any of these agents. Let the user know we can create a custom agent for them based on their needs." if source == "marketplace" else "Found agents in the user's library. You can provide a link to view an agent at: " - "/library/agents/{agent_id}. Use agent_output to get execution results, or run_agent to execute." + "/library/agents/{agent_id}. Use agent_output to get execution results, or run_agent to execute. Let the user know we can create a custom agent for them based on their needs." ) return AgentsFoundResponse( diff --git a/autogpt_platform/backend/backend/api/features/library/db.py b/autogpt_platform/backend/backend/api/features/library/db.py index 394f959953..6bebfb573c 100644 --- a/autogpt_platform/backend/backend/api/features/library/db.py +++ b/autogpt_platform/backend/backend/api/features/library/db.py @@ -19,7 +19,10 @@ from backend.data.graph import GraphSettings from backend.data.includes import AGENT_PRESET_INCLUDE, library_agent_include from backend.data.model import CredentialsMetaInput from backend.integrations.creds_manager import IntegrationCredentialsManager -from backend.integrations.webhooks.graph_lifecycle_hooks import on_graph_activate +from backend.integrations.webhooks.graph_lifecycle_hooks import ( + on_graph_activate, + on_graph_deactivate, +) from backend.util.clients import get_scheduler_client from backend.util.exceptions import DatabaseError, InvalidInputError, NotFoundError from backend.util.json import SafeJson @@ -537,6 +540,92 @@ async def update_agent_version_in_library( return library_model.LibraryAgent.from_db(lib) +async def create_graph_in_library( + graph: graph_db.Graph, + user_id: str, +) -> tuple[graph_db.GraphModel, library_model.LibraryAgent]: + """Create a new graph and add it to the user's library.""" + graph.version = 1 + graph_model = graph_db.make_graph_model(graph, user_id) + graph_model.reassign_ids(user_id=user_id, reassign_graph_id=True) + + created_graph = await graph_db.create_graph(graph_model, user_id) + + library_agents = await create_library_agent( + graph=created_graph, + user_id=user_id, + sensitive_action_safe_mode=True, + create_library_agents_for_sub_graphs=False, + ) + + if created_graph.is_active: + created_graph = await on_graph_activate(created_graph, user_id=user_id) + + return created_graph, library_agents[0] + + +async def update_graph_in_library( + graph: graph_db.Graph, + user_id: str, +) -> tuple[graph_db.GraphModel, library_model.LibraryAgent]: + """Create a new version of an existing graph and update the library entry.""" + existing_versions = await graph_db.get_graph_all_versions(graph.id, user_id) + current_active_version = ( + next((v for v in existing_versions if v.is_active), None) + if existing_versions + else None + ) + graph.version = ( + max(v.version for v in existing_versions) + 1 if existing_versions else 1 + ) + + graph_model = graph_db.make_graph_model(graph, user_id) + graph_model.reassign_ids(user_id=user_id, reassign_graph_id=False) + + created_graph = await graph_db.create_graph(graph_model, user_id) + + library_agent = await get_library_agent_by_graph_id(user_id, created_graph.id) + if not library_agent: + raise NotFoundError(f"Library agent not found for graph {created_graph.id}") + + library_agent = await update_library_agent_version_and_settings( + user_id, created_graph + ) + + if created_graph.is_active: + created_graph = await on_graph_activate(created_graph, user_id=user_id) + await graph_db.set_graph_active_version( + graph_id=created_graph.id, + version=created_graph.version, + user_id=user_id, + ) + if current_active_version: + await on_graph_deactivate(current_active_version, user_id=user_id) + + return created_graph, library_agent + + +async def update_library_agent_version_and_settings( + user_id: str, agent_graph: graph_db.GraphModel +) -> library_model.LibraryAgent: + """Update library agent to point to new graph version and sync settings.""" + library = await update_agent_version_in_library( + user_id, agent_graph.id, agent_graph.version + ) + updated_settings = GraphSettings.from_graph( + graph=agent_graph, + hitl_safe_mode=library.settings.human_in_the_loop_safe_mode, + sensitive_action_safe_mode=library.settings.sensitive_action_safe_mode, + ) + if updated_settings != library.settings: + library = await update_library_agent( + library_agent_id=library.id, + user_id=user_id, + settings=updated_settings, + ) + return library + + async def update_library_agent( library_agent_id: str, user_id: str, diff --git a/autogpt_platform/backend/backend/api/features/v1.py b/autogpt_platform/backend/backend/api/features/v1.py index 09d3759a65..a8610702cc 100644 --- a/autogpt_platform/backend/backend/api/features/v1.py +++ b/autogpt_platform/backend/backend/api/features/v1.py @@ -101,7 +101,6 @@ from backend.util.timezone_utils import ( from backend.util.virus_scanner import scan_content_safe from .library import db as library_db -from .library import model as library_model from .store.model import StoreAgentDetails @@ -823,18 +822,16 @@ async def update_graph( graph: graph_db.Graph, user_id: Annotated[str, Security(get_user_id)], ) -> graph_db.GraphModel: - # Sanity check if graph.id and graph.id != graph_id: raise HTTPException(400, detail="Graph ID does not match ID in URI") - # Determine new version existing_versions = await graph_db.get_graph_all_versions(graph_id, user_id=user_id) if not existing_versions: raise HTTPException(404, detail=f"Graph #{graph_id} not found") - latest_version_number = max(g.version for g in existing_versions) - graph.version = latest_version_number + 1 + graph.version = max(g.version for g in existing_versions) + 1 current_active_version = next((v for v in existing_versions if v.is_active), None) + graph = graph_db.make_graph_model(graph, user_id) graph.reassign_ids(user_id=user_id, reassign_graph_id=False) graph.validate_graph(for_run=False) @@ -842,27 +839,23 @@ async def update_graph( new_graph_version = await graph_db.create_graph(graph, user_id=user_id) if new_graph_version.is_active: - # Keep the library agent up to date with the new active version - await _update_library_agent_version_and_settings(user_id, new_graph_version) - - # Handle activation of the new graph first to ensure continuity + await library_db.update_library_agent_version_and_settings( + user_id, new_graph_version + ) new_graph_version = await on_graph_activate(new_graph_version, user_id=user_id) - # Ensure new version is the only active version await graph_db.set_graph_active_version( graph_id=graph_id, version=new_graph_version.version, user_id=user_id ) if current_active_version: - # Handle deactivation of the previously active version await on_graph_deactivate(current_active_version, user_id=user_id) - # Fetch new graph version *with sub-graphs* (needed for credentials input schema) new_graph_version_with_subgraphs = await graph_db.get_graph( graph_id, new_graph_version.version, user_id=user_id, include_subgraphs=True, ) - assert new_graph_version_with_subgraphs # make type checker happy + assert new_graph_version_with_subgraphs return new_graph_version_with_subgraphs @@ -900,33 +893,15 @@ async def set_graph_active_version( ) # Keep the library agent up to date with the new active version - await _update_library_agent_version_and_settings(user_id, new_active_graph) + await library_db.update_library_agent_version_and_settings( + user_id, new_active_graph + ) if current_active_graph and current_active_graph.version != new_active_version: # Handle deactivation of the previously active version await on_graph_deactivate(current_active_graph, user_id=user_id) -async def _update_library_agent_version_and_settings( - user_id: str, agent_graph: graph_db.GraphModel -) -> library_model.LibraryAgent: - library = await library_db.update_agent_version_in_library( - user_id, agent_graph.id, agent_graph.version - ) - updated_settings = GraphSettings.from_graph( - graph=agent_graph, - hitl_safe_mode=library.settings.human_in_the_loop_safe_mode, - sensitive_action_safe_mode=library.settings.sensitive_action_safe_mode, - ) - if updated_settings != library.settings: - library = await library_db.update_library_agent( - library_agent_id=library.id, - user_id=user_id, - settings=updated_settings, - ) - return library - - @v1_router.patch( path="/graphs/{graph_id}/settings", summary="Update graph settings", diff --git a/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py b/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py new file mode 100644 index 0000000000..b823627b43 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py @@ -0,0 +1,28 @@ +"""ElevenLabs integration blocks - test credentials and shared utilities.""" + +from typing import Literal + +from pydantic import SecretStr + +from backend.data.model import APIKeyCredentials, CredentialsMetaInput +from backend.integrations.providers import ProviderName + +TEST_CREDENTIALS = APIKeyCredentials( + id="01234567-89ab-cdef-0123-456789abcdef", + provider="elevenlabs", + api_key=SecretStr("mock-elevenlabs-api-key"), + title="Mock ElevenLabs API key", + expires_at=None, +) + +TEST_CREDENTIALS_INPUT = { + "provider": TEST_CREDENTIALS.provider, + "id": TEST_CREDENTIALS.id, + "type": TEST_CREDENTIALS.type, + "title": TEST_CREDENTIALS.title, +} + +ElevenLabsCredentials = APIKeyCredentials +ElevenLabsCredentialsInput = CredentialsMetaInput[ + Literal[ProviderName.ELEVENLABS], Literal["api_key"] +] diff --git a/autogpt_platform/backend/backend/blocks/encoder_block.py b/autogpt_platform/backend/backend/blocks/encoder_block.py new file mode 100644 index 0000000000..b60a4ae828 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/encoder_block.py @@ -0,0 +1,77 @@ +"""Text encoding block for converting special characters to escape sequences.""" + +import codecs + +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.model import SchemaField + + +class TextEncoderBlock(Block): + """ + Encodes a string by converting special characters into escape sequences. + + This block is the inverse of TextDecoderBlock. It takes text containing + special characters (like newlines, tabs, etc.) and converts them into + their escape sequence representations (e.g., newline becomes \\n). + """ + + class Input(BlockSchemaInput): + """Input schema for TextEncoderBlock.""" + + text: str = SchemaField( + description="A string containing special characters to be encoded", + placeholder="Your text with newlines and quotes to encode", + ) + + class Output(BlockSchemaOutput): + """Output schema for TextEncoderBlock.""" + + encoded_text: str = SchemaField( + description="The encoded text with special characters converted to escape sequences" + ) + error: str = SchemaField(description="Error message if encoding fails") + + def __init__(self): + super().__init__( + id="5185f32e-4b65-4ecf-8fbb-873f003f09d6", + description="Encodes a string by converting special characters into escape sequences", + categories={BlockCategory.TEXT}, + input_schema=TextEncoderBlock.Input, + output_schema=TextEncoderBlock.Output, + test_input={ + "text": """Hello +World! +This is a "quoted" string.""" + }, + test_output=[ + ( + "encoded_text", + """Hello\\nWorld!\\nThis is a "quoted" string.""", + ) + ], + ) + + async def run(self, input_data: Input, **kwargs) -> BlockOutput: + """ + Encode the input text by converting special characters to escape sequences. + + Args: + input_data: The input containing the text to encode. + **kwargs: Additional keyword arguments (unused). + + Yields: + The encoded text with escape sequences, or an error message if encoding fails. + """ + try: + encoded_text = codecs.encode(input_data.text, "unicode_escape").decode( + "utf-8" + ) + yield "encoded_text", encoded_text + except Exception as e: + yield "error", f"Encoding error: {str(e)}" diff --git a/autogpt_platform/backend/backend/blocks/llm.py b/autogpt_platform/backend/backend/blocks/llm.py index 54295da1f1..be2b85949e 100644 --- a/autogpt_platform/backend/backend/blocks/llm.py +++ b/autogpt_platform/backend/backend/blocks/llm.py @@ -115,6 +115,7 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta): CLAUDE_4_5_OPUS = "claude-opus-4-5-20251101" CLAUDE_4_5_SONNET = "claude-sonnet-4-5-20250929" CLAUDE_4_5_HAIKU = "claude-haiku-4-5-20251001" + CLAUDE_4_6_OPUS = "claude-opus-4-6" CLAUDE_3_HAIKU = "claude-3-haiku-20240307" # AI/ML API models AIML_API_QWEN2_5_72B = "Qwen/Qwen2.5-72B-Instruct-Turbo" @@ -270,6 +271,9 @@ MODEL_METADATA = { LlmModel.CLAUDE_4_SONNET: ModelMetadata( "anthropic", 200000, 64000, "Claude Sonnet 4", "Anthropic", "Anthropic", 2 ), # claude-4-sonnet-20250514 + LlmModel.CLAUDE_4_6_OPUS: ModelMetadata( + "anthropic", 200000, 128000, "Claude Opus 4.6", "Anthropic", "Anthropic", 3 + ), # claude-opus-4-6 LlmModel.CLAUDE_4_5_OPUS: ModelMetadata( "anthropic", 200000, 64000, "Claude Opus 4.5", "Anthropic", "Anthropic", 3 ), # claude-opus-4-5-20251101 diff --git a/autogpt_platform/backend/backend/blocks/media.py b/autogpt_platform/backend/backend/blocks/media.py deleted file mode 100644 index a8d145bc64..0000000000 --- a/autogpt_platform/backend/backend/blocks/media.py +++ /dev/null @@ -1,246 +0,0 @@ -import os -import tempfile -from typing import Optional - -from moviepy.audio.io.AudioFileClip import AudioFileClip -from moviepy.video.fx.Loop import Loop -from moviepy.video.io.VideoFileClip import VideoFileClip - -from backend.data.block import ( - Block, - BlockCategory, - BlockOutput, - BlockSchemaInput, - BlockSchemaOutput, -) -from backend.data.execution import ExecutionContext -from backend.data.model import SchemaField -from backend.util.file import MediaFileType, get_exec_file_path, store_media_file - - -class MediaDurationBlock(Block): - - class Input(BlockSchemaInput): - media_in: MediaFileType = SchemaField( - description="Media input (URL, data URI, or local path)." - ) - is_video: bool = SchemaField( - description="Whether the media is a video (True) or audio (False).", - default=True, - ) - - class Output(BlockSchemaOutput): - duration: float = SchemaField( - description="Duration of the media file (in seconds)." - ) - - def __init__(self): - super().__init__( - id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6", - description="Block to get the duration of a media file.", - categories={BlockCategory.MULTIMEDIA}, - input_schema=MediaDurationBlock.Input, - output_schema=MediaDurationBlock.Output, - ) - - async def run( - self, - input_data: Input, - *, - execution_context: ExecutionContext, - **kwargs, - ) -> BlockOutput: - # 1) Store the input media locally - local_media_path = await store_media_file( - file=input_data.media_in, - execution_context=execution_context, - return_format="for_local_processing", - ) - assert execution_context.graph_exec_id is not None - media_abspath = get_exec_file_path( - execution_context.graph_exec_id, local_media_path - ) - - # 2) Load the clip - if input_data.is_video: - clip = VideoFileClip(media_abspath) - else: - clip = AudioFileClip(media_abspath) - - yield "duration", clip.duration - - -class LoopVideoBlock(Block): - """ - Block for looping (repeating) a video clip until a given duration or number of loops. - """ - - class Input(BlockSchemaInput): - video_in: MediaFileType = SchemaField( - description="The input video (can be a URL, data URI, or local path)." - ) - # Provide EITHER a `duration` or `n_loops` or both. We'll demonstrate `duration`. - duration: Optional[float] = SchemaField( - description="Target duration (in seconds) to loop the video to. If omitted, defaults to no looping.", - default=None, - ge=0.0, - ) - n_loops: Optional[int] = SchemaField( - description="Number of times to repeat the video. If omitted, defaults to 1 (no repeat).", - default=None, - ge=1, - ) - - class Output(BlockSchemaOutput): - video_out: str = SchemaField( - description="Looped video returned either as a relative path or a data URI." - ) - - def __init__(self): - super().__init__( - id="8bf9eef6-5451-4213-b265-25306446e94b", - description="Block to loop a video to a given duration or number of repeats.", - categories={BlockCategory.MULTIMEDIA}, - input_schema=LoopVideoBlock.Input, - output_schema=LoopVideoBlock.Output, - ) - - async def run( - self, - input_data: Input, - *, - execution_context: ExecutionContext, - **kwargs, - ) -> BlockOutput: - assert execution_context.graph_exec_id is not None - assert execution_context.node_exec_id is not None - graph_exec_id = execution_context.graph_exec_id - node_exec_id = execution_context.node_exec_id - - # 1) Store the input video locally - local_video_path = await store_media_file( - file=input_data.video_in, - execution_context=execution_context, - return_format="for_local_processing", - ) - input_abspath = get_exec_file_path(graph_exec_id, local_video_path) - - # 2) Load the clip - clip = VideoFileClip(input_abspath) - - # 3) Apply the loop effect - looped_clip = clip - if input_data.duration: - # Loop until we reach the specified duration - looped_clip = looped_clip.with_effects([Loop(duration=input_data.duration)]) - elif input_data.n_loops: - looped_clip = looped_clip.with_effects([Loop(n=input_data.n_loops)]) - else: - raise ValueError("Either 'duration' or 'n_loops' must be provided.") - - assert isinstance(looped_clip, VideoFileClip) - - # 4) Save the looped output - output_filename = MediaFileType( - f"{node_exec_id}_looped_{os.path.basename(local_video_path)}" - ) - output_abspath = get_exec_file_path(graph_exec_id, output_filename) - - looped_clip = looped_clip.with_audio(clip.audio) - looped_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac") - - # Return output - for_block_output returns workspace:// if available, else data URI - video_out = await store_media_file( - file=output_filename, - execution_context=execution_context, - return_format="for_block_output", - ) - - yield "video_out", video_out - - -class AddAudioToVideoBlock(Block): - """ - Block that adds (attaches) an audio track to an existing video. - Optionally scale the volume of the new track. - """ - - class Input(BlockSchemaInput): - video_in: MediaFileType = SchemaField( - description="Video input (URL, data URI, or local path)." - ) - audio_in: MediaFileType = SchemaField( - description="Audio input (URL, data URI, or local path)." - ) - volume: float = SchemaField( - description="Volume scale for the newly attached audio track (1.0 = original).", - default=1.0, - ) - - class Output(BlockSchemaOutput): - video_out: MediaFileType = SchemaField( - description="Final video (with attached audio), as a path or data URI." - ) - - def __init__(self): - super().__init__( - id="3503748d-62b6-4425-91d6-725b064af509", - description="Block to attach an audio file to a video file using moviepy.", - categories={BlockCategory.MULTIMEDIA}, - input_schema=AddAudioToVideoBlock.Input, - output_schema=AddAudioToVideoBlock.Output, - ) - - async def run( - self, - input_data: Input, - *, - execution_context: ExecutionContext, - **kwargs, - ) -> BlockOutput: - assert execution_context.graph_exec_id is not None - assert execution_context.node_exec_id is not None - graph_exec_id = execution_context.graph_exec_id - node_exec_id = execution_context.node_exec_id - - # 1) Store the inputs locally - local_video_path = await store_media_file( - file=input_data.video_in, - execution_context=execution_context, - return_format="for_local_processing", - ) - local_audio_path = await store_media_file( - file=input_data.audio_in, - execution_context=execution_context, - return_format="for_local_processing", - ) - - abs_temp_dir = os.path.join(tempfile.gettempdir(), "exec_file", graph_exec_id) - video_abspath = os.path.join(abs_temp_dir, local_video_path) - audio_abspath = os.path.join(abs_temp_dir, local_audio_path) - - # 2) Load video + audio with moviepy - video_clip = VideoFileClip(video_abspath) - audio_clip = AudioFileClip(audio_abspath) - # Optionally scale volume - if input_data.volume != 1.0: - audio_clip = audio_clip.with_volume_scaled(input_data.volume) - - # 3) Attach the new audio track - final_clip = video_clip.with_audio(audio_clip) - - # 4) Write to output file - output_filename = MediaFileType( - f"{node_exec_id}_audio_attached_{os.path.basename(local_video_path)}" - ) - output_abspath = os.path.join(abs_temp_dir, output_filename) - final_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac") - - # 5) Return output - for_block_output returns workspace:// if available, else data URI - video_out = await store_media_file( - file=output_filename, - execution_context=execution_context, - return_format="for_block_output", - ) - - yield "video_out", video_out diff --git a/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py b/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py new file mode 100644 index 0000000000..1e9b9fed4f --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py @@ -0,0 +1,77 @@ +import pytest + +from backend.blocks.encoder_block import TextEncoderBlock + + +@pytest.mark.asyncio +async def test_text_encoder_basic(): + """Test basic encoding of newlines and special characters.""" + block = TextEncoderBlock() + result = [] + async for output in block.run(TextEncoderBlock.Input(text="Hello\nWorld")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + assert result[0][1] == "Hello\\nWorld" + + +@pytest.mark.asyncio +async def test_text_encoder_multiple_escapes(): + """Test encoding of multiple escape sequences.""" + block = TextEncoderBlock() + result = [] + async for output in block.run( + TextEncoderBlock.Input(text="Line1\nLine2\tTabbed\rCarriage") + ): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + assert "\\n" in result[0][1] + assert "\\t" in result[0][1] + assert "\\r" in result[0][1] + + +@pytest.mark.asyncio +async def test_text_encoder_unicode(): + """Test that unicode characters are handled correctly.""" + block = TextEncoderBlock() + result = [] + async for output in block.run(TextEncoderBlock.Input(text="Hello δΈ–η•Œ\n")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + # Unicode characters should be escaped as \uXXXX sequences + assert "\\n" in result[0][1] + + +@pytest.mark.asyncio +async def test_text_encoder_empty_string(): + """Test encoding of an empty string.""" + block = TextEncoderBlock() + result = [] + async for output in block.run(TextEncoderBlock.Input(text="")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + assert result[0][1] == "" + + +@pytest.mark.asyncio +async def test_text_encoder_error_handling(): + """Test that encoding errors are handled gracefully.""" + from unittest.mock import patch + + block = TextEncoderBlock() + result = [] + + with patch("codecs.encode", side_effect=Exception("Mocked encoding error")): + async for output in block.run(TextEncoderBlock.Input(text="test")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "error" + assert "Mocked encoding error" in result[0][1] diff --git a/autogpt_platform/backend/backend/blocks/video/__init__.py b/autogpt_platform/backend/backend/blocks/video/__init__.py new file mode 100644 index 0000000000..4974ae8a87 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/__init__.py @@ -0,0 +1,37 @@ +"""Video editing blocks for AutoGPT Platform. + +This module provides blocks for: +- Downloading videos from URLs (YouTube, Vimeo, news sites, direct links) +- Clipping/trimming video segments +- Concatenating multiple videos +- Adding text overlays +- Adding AI-generated narration +- Getting media duration +- Looping videos +- Adding audio to videos + +Dependencies: +- yt-dlp: For video downloading +- moviepy: For video editing operations +- elevenlabs: For AI narration (optional) +""" + +from backend.blocks.video.add_audio import AddAudioToVideoBlock +from backend.blocks.video.clip import VideoClipBlock +from backend.blocks.video.concat import VideoConcatBlock +from backend.blocks.video.download import VideoDownloadBlock +from backend.blocks.video.duration import MediaDurationBlock +from backend.blocks.video.loop import LoopVideoBlock +from backend.blocks.video.narration import VideoNarrationBlock +from backend.blocks.video.text_overlay import VideoTextOverlayBlock + +__all__ = [ + "AddAudioToVideoBlock", + "LoopVideoBlock", + "MediaDurationBlock", + "VideoClipBlock", + "VideoConcatBlock", + "VideoDownloadBlock", + "VideoNarrationBlock", + "VideoTextOverlayBlock", +] diff --git a/autogpt_platform/backend/backend/blocks/video/_utils.py b/autogpt_platform/backend/backend/blocks/video/_utils.py new file mode 100644 index 0000000000..9ebf195078 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/_utils.py @@ -0,0 +1,131 @@ +"""Shared utilities for video blocks.""" + +from __future__ import annotations + +import logging +import os +import re +import subprocess +from pathlib import Path + +logger = logging.getLogger(__name__) + +# Known operation tags added by video blocks +_VIDEO_OPS = ( + r"(?:clip|overlay|narrated|looped|concat|audio_attached|with_audio|narration)" +) + +# Matches: {node_exec_id}_{operation}_ where node_exec_id contains a UUID +_BLOCK_PREFIX_RE = re.compile( + r"^[a-zA-Z0-9_-]*" + r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" + r"[a-zA-Z0-9_-]*" + r"_" + _VIDEO_OPS + r"_" +) + +# Matches: a lone {node_exec_id}_ prefix (no operation keyword, e.g. download output) +_UUID_PREFIX_RE = re.compile( + r"^[a-zA-Z0-9_-]*" + r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" + r"[a-zA-Z0-9_-]*_" +) + + +def extract_source_name(input_path: str, max_length: int = 50) -> str: + """Extract the original source filename by stripping block-generated prefixes. + + Iteratively removes {node_exec_id}_{operation}_ prefixes that accumulate + when chaining video blocks, recovering the original human-readable name. + + Safe for plain filenames (no UUID -> no stripping). + Falls back to "video" if everything is stripped. + """ + stem = Path(input_path).stem + + # Pass 1: strip {node_exec_id}_{operation}_ prefixes iteratively + while _BLOCK_PREFIX_RE.match(stem): + stem = _BLOCK_PREFIX_RE.sub("", stem, count=1) + + # Pass 2: strip a lone {node_exec_id}_ prefix (e.g. from download block) + if _UUID_PREFIX_RE.match(stem): + stem = _UUID_PREFIX_RE.sub("", stem, count=1) + + if not stem: + return "video" + + return stem[:max_length] + + +def get_video_codecs(output_path: str) -> tuple[str, str]: + """Get appropriate video and audio codecs based on output file extension. + + Args: + output_path: Path to the output file (used to determine extension) + + Returns: + Tuple of (video_codec, audio_codec) + + Codec mappings: + - .mp4: H.264 + AAC (universal compatibility) + - .webm: VP8 + Vorbis (web streaming) + - .mkv: H.264 + AAC (container supports many codecs) + - .mov: H.264 + AAC (Apple QuickTime, widely compatible) + - .m4v: H.264 + AAC (Apple iTunes/devices) + - .avi: MPEG-4 + MP3 (legacy Windows) + """ + ext = os.path.splitext(output_path)[1].lower() + + codec_map: dict[str, tuple[str, str]] = { + ".mp4": ("libx264", "aac"), + ".webm": ("libvpx", "libvorbis"), + ".mkv": ("libx264", "aac"), + ".mov": ("libx264", "aac"), + ".m4v": ("libx264", "aac"), + ".avi": ("mpeg4", "libmp3lame"), + } + + return codec_map.get(ext, ("libx264", "aac")) + + +def strip_chapters_inplace(video_path: str) -> None: + """Strip chapter metadata from a media file in-place using ffmpeg. + + MoviePy 2.x crashes with IndexError when parsing files with embedded + chapter metadata (https://github.com/Zulko/moviepy/issues/2419). + This strips chapters without re-encoding. + + Args: + video_path: Absolute path to the media file to strip chapters from. + """ + base, ext = os.path.splitext(video_path) + tmp_path = base + ".tmp" + ext + try: + result = subprocess.run( + [ + "ffmpeg", + "-y", + "-i", + video_path, + "-map_chapters", + "-1", + "-codec", + "copy", + tmp_path, + ], + capture_output=True, + text=True, + timeout=300, + ) + if result.returncode != 0: + logger.warning( + "ffmpeg chapter strip failed (rc=%d): %s", + result.returncode, + result.stderr, + ) + return + os.replace(tmp_path, video_path) + except FileNotFoundError: + logger.warning("ffmpeg not found; skipping chapter strip") + finally: + if os.path.exists(tmp_path): + os.unlink(tmp_path) diff --git a/autogpt_platform/backend/backend/blocks/video/add_audio.py b/autogpt_platform/backend/backend/blocks/video/add_audio.py new file mode 100644 index 0000000000..ebd4ab94f2 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/add_audio.py @@ -0,0 +1,113 @@ +"""AddAudioToVideoBlock - Attach an audio track to a video file.""" + +from moviepy.audio.io.AudioFileClip import AudioFileClip +from moviepy.video.io.VideoFileClip import VideoFileClip + +from backend.blocks.video._utils import extract_source_name, strip_chapters_inplace +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.execution import ExecutionContext +from backend.data.model import SchemaField +from backend.util.file import MediaFileType, get_exec_file_path, store_media_file + + +class AddAudioToVideoBlock(Block): + """Add (attach) an audio track to an existing video.""" + + class Input(BlockSchemaInput): + video_in: MediaFileType = SchemaField( + description="Video input (URL, data URI, or local path)." + ) + audio_in: MediaFileType = SchemaField( + description="Audio input (URL, data URI, or local path)." + ) + volume: float = SchemaField( + description="Volume scale for the newly attached audio track (1.0 = original).", + default=1.0, + ) + + class Output(BlockSchemaOutput): + video_out: MediaFileType = SchemaField( + description="Final video (with attached audio), as a path or data URI." + ) + + def __init__(self): + super().__init__( + id="3503748d-62b6-4425-91d6-725b064af509", + description="Block to attach an audio file to a video file using moviepy.", + categories={BlockCategory.MULTIMEDIA}, + input_schema=AddAudioToVideoBlock.Input, + output_schema=AddAudioToVideoBlock.Output, + ) + + async def run( + self, + input_data: Input, + *, + execution_context: ExecutionContext, + **kwargs, + ) -> BlockOutput: + assert execution_context.graph_exec_id is not None + assert execution_context.node_exec_id is not None + graph_exec_id = execution_context.graph_exec_id + node_exec_id = execution_context.node_exec_id + + # 1) Store the inputs locally + local_video_path = await store_media_file( + file=input_data.video_in, + execution_context=execution_context, + return_format="for_local_processing", + ) + local_audio_path = await store_media_file( + file=input_data.audio_in, + execution_context=execution_context, + return_format="for_local_processing", + ) + + video_abspath = get_exec_file_path(graph_exec_id, local_video_path) + audio_abspath = get_exec_file_path(graph_exec_id, local_audio_path) + + # 2) Load video + audio with moviepy + strip_chapters_inplace(video_abspath) + strip_chapters_inplace(audio_abspath) + video_clip = None + audio_clip = None + final_clip = None + try: + video_clip = VideoFileClip(video_abspath) + audio_clip = AudioFileClip(audio_abspath) + # Optionally scale volume + if input_data.volume != 1.0: + audio_clip = audio_clip.with_volume_scaled(input_data.volume) + + # 3) Attach the new audio track + final_clip = video_clip.with_audio(audio_clip) + + # 4) Write to output file + source = extract_source_name(local_video_path) + output_filename = MediaFileType(f"{node_exec_id}_with_audio_{source}.mp4") + output_abspath = get_exec_file_path(graph_exec_id, output_filename) + final_clip.write_videofile( + output_abspath, codec="libx264", audio_codec="aac" + ) + finally: + if final_clip: + final_clip.close() + if audio_clip: + audio_clip.close() + if video_clip: + video_clip.close() + + # 5) Return output - for_block_output returns workspace:// if available, else data URI + video_out = await store_media_file( + file=output_filename, + execution_context=execution_context, + return_format="for_block_output", + ) + + yield "video_out", video_out diff --git a/autogpt_platform/backend/backend/blocks/video/clip.py b/autogpt_platform/backend/backend/blocks/video/clip.py new file mode 100644 index 0000000000..05deea6530 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/clip.py @@ -0,0 +1,167 @@ +"""VideoClipBlock - Extract a segment from a video file.""" + +from typing import Literal + +from moviepy.video.io.VideoFileClip import VideoFileClip + +from backend.blocks.video._utils import ( + extract_source_name, + get_video_codecs, + strip_chapters_inplace, +) +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.execution import ExecutionContext +from backend.data.model import SchemaField +from backend.util.exceptions import BlockExecutionError +from backend.util.file import MediaFileType, get_exec_file_path, store_media_file + + +class VideoClipBlock(Block): + """Extract a time segment from a video.""" + + class Input(BlockSchemaInput): + video_in: MediaFileType = SchemaField( + description="Input video (URL, data URI, or local path)" + ) + start_time: float = SchemaField(description="Start time in seconds", ge=0.0) + end_time: float = SchemaField(description="End time in seconds", ge=0.0) + output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField( + description="Output format", default="mp4", advanced=True + ) + + class Output(BlockSchemaOutput): + video_out: MediaFileType = SchemaField( + description="Clipped video file (path or data URI)" + ) + duration: float = SchemaField(description="Clip duration in seconds") + + def __init__(self): + super().__init__( + id="8f539119-e580-4d86-ad41-86fbcb22abb1", + description="Extract a time segment from a video", + categories={BlockCategory.MULTIMEDIA}, + input_schema=self.Input, + output_schema=self.Output, + test_input={ + "video_in": "/tmp/test.mp4", + "start_time": 0.0, + "end_time": 10.0, + }, + test_output=[("video_out", str), ("duration", float)], + test_mock={ + "_clip_video": lambda *args: 10.0, + "_store_input_video": lambda *args, **kwargs: "test.mp4", + "_store_output_video": lambda *args, **kwargs: "clip_test.mp4", + }, + ) + + async def _store_input_video( + self, execution_context: ExecutionContext, file: MediaFileType + ) -> MediaFileType: + """Store input video. Extracted for testability.""" + return await store_media_file( + file=file, + execution_context=execution_context, + return_format="for_local_processing", + ) + + async def _store_output_video( + self, execution_context: ExecutionContext, file: MediaFileType + ) -> MediaFileType: + """Store output video. Extracted for testability.""" + return await store_media_file( + file=file, + execution_context=execution_context, + return_format="for_block_output", + ) + + def _clip_video( + self, + video_abspath: str, + output_abspath: str, + start_time: float, + end_time: float, + ) -> float: + """Extract a clip from a video. Extracted for testability.""" + clip = None + subclip = None + try: + strip_chapters_inplace(video_abspath) + clip = VideoFileClip(video_abspath) + subclip = clip.subclipped(start_time, end_time) + video_codec, audio_codec = get_video_codecs(output_abspath) + subclip.write_videofile( + output_abspath, codec=video_codec, audio_codec=audio_codec + ) + return subclip.duration + finally: + if subclip: + subclip.close() + if clip: + clip.close() + + async def run( + self, + input_data: Input, + *, + execution_context: ExecutionContext, + node_exec_id: str, + **kwargs, + ) -> BlockOutput: + # Validate time range + if input_data.end_time <= input_data.start_time: + raise BlockExecutionError( + message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})", + block_name=self.name, + block_id=str(self.id), + ) + + try: + assert execution_context.graph_exec_id is not None + + # Store the input video locally + local_video_path = await self._store_input_video( + execution_context, input_data.video_in + ) + video_abspath = get_exec_file_path( + execution_context.graph_exec_id, local_video_path + ) + + # Build output path + source = extract_source_name(local_video_path) + output_filename = MediaFileType( + f"{node_exec_id}_clip_{source}.{input_data.output_format}" + ) + output_abspath = get_exec_file_path( + execution_context.graph_exec_id, output_filename + ) + + duration = self._clip_video( + video_abspath, + output_abspath, + input_data.start_time, + input_data.end_time, + ) + + # Return as workspace path or data URI based on context + video_out = await self._store_output_video( + execution_context, output_filename + ) + + yield "video_out", video_out + yield "duration", duration + + except BlockExecutionError: + raise + except Exception as e: + raise BlockExecutionError( + message=f"Failed to clip video: {e}", + block_name=self.name, + block_id=str(self.id), + ) from e diff --git a/autogpt_platform/backend/backend/blocks/video/concat.py b/autogpt_platform/backend/backend/blocks/video/concat.py new file mode 100644 index 0000000000..b49854fb40 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/concat.py @@ -0,0 +1,227 @@ +"""VideoConcatBlock - Concatenate multiple video clips into one.""" + +from typing import Literal + +from moviepy import concatenate_videoclips +from moviepy.video.fx import CrossFadeIn, CrossFadeOut, FadeIn, FadeOut +from moviepy.video.io.VideoFileClip import VideoFileClip + +from backend.blocks.video._utils import ( + extract_source_name, + get_video_codecs, + strip_chapters_inplace, +) +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.execution import ExecutionContext +from backend.data.model import SchemaField +from backend.util.exceptions import BlockExecutionError +from backend.util.file import MediaFileType, get_exec_file_path, store_media_file + + +class VideoConcatBlock(Block): + """Merge multiple video clips into one continuous video.""" + + class Input(BlockSchemaInput): + videos: list[MediaFileType] = SchemaField( + description="List of video files to concatenate (in order)" + ) + transition: Literal["none", "crossfade", "fade_black"] = SchemaField( + description="Transition between clips", default="none" + ) + transition_duration: int = SchemaField( + description="Transition duration in seconds", + default=1, + ge=0, + advanced=True, + ) + output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField( + description="Output format", default="mp4", advanced=True + ) + + class Output(BlockSchemaOutput): + video_out: MediaFileType = SchemaField( + description="Concatenated video file (path or data URI)" + ) + total_duration: float = SchemaField(description="Total duration in seconds") + + def __init__(self): + super().__init__( + id="9b0f531a-1118-487f-aeec-3fa63ea8900a", + description="Merge multiple video clips into one continuous video", + categories={BlockCategory.MULTIMEDIA}, + input_schema=self.Input, + output_schema=self.Output, + test_input={ + "videos": ["/tmp/a.mp4", "/tmp/b.mp4"], + }, + test_output=[ + ("video_out", str), + ("total_duration", float), + ], + test_mock={ + "_concat_videos": lambda *args: 20.0, + "_store_input_video": lambda *args, **kwargs: "test.mp4", + "_store_output_video": lambda *args, **kwargs: "concat_test.mp4", + }, + ) + + async def _store_input_video( + self, execution_context: ExecutionContext, file: MediaFileType + ) -> MediaFileType: + """Store input video. Extracted for testability.""" + return await store_media_file( + file=file, + execution_context=execution_context, + return_format="for_local_processing", + ) + + async def _store_output_video( + self, execution_context: ExecutionContext, file: MediaFileType + ) -> MediaFileType: + """Store output video. Extracted for testability.""" + return await store_media_file( + file=file, + execution_context=execution_context, + return_format="for_block_output", + ) + + def _concat_videos( + self, + video_abspaths: list[str], + output_abspath: str, + transition: str, + transition_duration: int, + ) -> float: + """Concatenate videos. Extracted for testability. + + Returns: + Total duration of the concatenated video. + """ + clips = [] + faded_clips = [] + final = None + try: + # Load clips + for v in video_abspaths: + strip_chapters_inplace(v) + clips.append(VideoFileClip(v)) + + # Validate transition_duration against shortest clip + if transition in {"crossfade", "fade_black"} and transition_duration > 0: + min_duration = min(c.duration for c in clips) + if transition_duration >= min_duration: + raise BlockExecutionError( + message=( + f"transition_duration ({transition_duration}s) must be " + f"shorter than the shortest clip ({min_duration:.2f}s)" + ), + block_name=self.name, + block_id=str(self.id), + ) + + if transition == "crossfade": + for i, clip in enumerate(clips): + effects = [] + if i > 0: + effects.append(CrossFadeIn(transition_duration)) + if i < len(clips) - 1: + effects.append(CrossFadeOut(transition_duration)) + if effects: + clip = clip.with_effects(effects) + faded_clips.append(clip) + final = concatenate_videoclips( + faded_clips, + method="compose", + padding=-transition_duration, + ) + elif transition == "fade_black": + for clip in clips: + faded = clip.with_effects( + [FadeIn(transition_duration), FadeOut(transition_duration)] + ) + faded_clips.append(faded) + final = concatenate_videoclips(faded_clips) + else: + final = concatenate_videoclips(clips) + + video_codec, audio_codec = get_video_codecs(output_abspath) + final.write_videofile( + output_abspath, codec=video_codec, audio_codec=audio_codec + ) + + return final.duration + finally: + if final: + final.close() + for clip in faded_clips: + clip.close() + for clip in clips: + clip.close() + + async def run( + self, + input_data: Input, + *, + execution_context: ExecutionContext, + node_exec_id: str, + **kwargs, + ) -> BlockOutput: + # Validate minimum clips + if len(input_data.videos) < 2: + raise BlockExecutionError( + message="At least 2 videos are required for concatenation", + block_name=self.name, + block_id=str(self.id), + ) + + try: + assert execution_context.graph_exec_id is not None + + # Store all input videos locally + video_abspaths = [] + for video in input_data.videos: + local_path = await self._store_input_video(execution_context, video) + video_abspaths.append( + get_exec_file_path(execution_context.graph_exec_id, local_path) + ) + + # Build output path + source = ( + extract_source_name(video_abspaths[0]) if video_abspaths else "video" + ) + output_filename = MediaFileType( + f"{node_exec_id}_concat_{source}.{input_data.output_format}" + ) + output_abspath = get_exec_file_path( + execution_context.graph_exec_id, output_filename + ) + + total_duration = self._concat_videos( + video_abspaths, + output_abspath, + input_data.transition, + input_data.transition_duration, + ) + + # Return as workspace path or data URI based on context + video_out = await self._store_output_video( + execution_context, output_filename + ) + + yield "video_out", video_out + yield "total_duration", total_duration + + except BlockExecutionError: + raise + except Exception as e: + raise BlockExecutionError( + message=f"Failed to concatenate videos: {e}", + block_name=self.name, + block_id=str(self.id), + ) from e diff --git a/autogpt_platform/backend/backend/blocks/video/download.py b/autogpt_platform/backend/backend/blocks/video/download.py new file mode 100644 index 0000000000..4046d5df42 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/download.py @@ -0,0 +1,172 @@ +"""VideoDownloadBlock - Download video from URL (YouTube, Vimeo, news sites, direct links).""" + +import os +import typing +from typing import Literal + +import yt_dlp + +if typing.TYPE_CHECKING: + from yt_dlp import _Params + +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.execution import ExecutionContext +from backend.data.model import SchemaField +from backend.util.exceptions import BlockExecutionError +from backend.util.file import MediaFileType, get_exec_file_path, store_media_file + + +class VideoDownloadBlock(Block): + """Download video from URL using yt-dlp.""" + + class Input(BlockSchemaInput): + url: str = SchemaField( + description="URL of the video to download (YouTube, Vimeo, direct link, etc.)", + placeholder="https://www.youtube.com/watch?v=...", + ) + quality: Literal["best", "1080p", "720p", "480p", "audio_only"] = SchemaField( + description="Video quality preference", default="720p" + ) + output_format: Literal["mp4", "webm", "mkv"] = SchemaField( + description="Output video format", default="mp4", advanced=True + ) + + class Output(BlockSchemaOutput): + video_file: MediaFileType = SchemaField( + description="Downloaded video (path or data URI)" + ) + duration: float = SchemaField(description="Video duration in seconds") + title: str = SchemaField(description="Video title from source") + source_url: str = SchemaField(description="Original source URL") + + def __init__(self): + super().__init__( + id="c35daabb-cd60-493b-b9ad-51f1fe4b50c4", + description="Download video from URL (YouTube, Vimeo, news sites, direct links)", + categories={BlockCategory.MULTIMEDIA}, + input_schema=self.Input, + output_schema=self.Output, + disabled=True, # Disable until we can sandbox yt-dlp and handle security implications + test_input={ + "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + "quality": "480p", + }, + test_output=[ + ("video_file", str), + ("duration", float), + ("title", str), + ("source_url", str), + ], + test_mock={ + "_download_video": lambda *args: ( + "video.mp4", + 212.0, + "Test Video", + ), + "_store_output_video": lambda *args, **kwargs: "video.mp4", + }, + ) + + async def _store_output_video( + self, execution_context: ExecutionContext, file: MediaFileType + ) -> MediaFileType: + """Store output video. Extracted for testability.""" + return await store_media_file( + file=file, + execution_context=execution_context, + return_format="for_block_output", + ) + + def _get_format_string(self, quality: str) -> str: + formats = { + "best": "bestvideo+bestaudio/best", + "1080p": "bestvideo[height<=1080]+bestaudio/best[height<=1080]", + "720p": "bestvideo[height<=720]+bestaudio/best[height<=720]", + "480p": "bestvideo[height<=480]+bestaudio/best[height<=480]", + "audio_only": "bestaudio/best", + } + return formats.get(quality, formats["720p"]) + + def _download_video( + self, + url: str, + quality: str, + output_format: str, + output_dir: str, + node_exec_id: str, + ) -> tuple[str, float, str]: + """Download video. Extracted for testability.""" + output_template = os.path.join( + output_dir, f"{node_exec_id}_%(title).50s.%(ext)s" + ) + + ydl_opts: "_Params" = { + "format": f"{self._get_format_string(quality)}/best", + "outtmpl": output_template, + "merge_output_format": output_format, + "quiet": True, + "no_warnings": True, + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(url, download=True) + video_path = ydl.prepare_filename(info) + + # Handle format conversion in filename + if not video_path.endswith(f".{output_format}"): + video_path = video_path.rsplit(".", 1)[0] + f".{output_format}" + + # Return just the filename, not the full path + filename = os.path.basename(video_path) + + return ( + filename, + info.get("duration") or 0.0, + info.get("title") or "Unknown", + ) + + async def run( + self, + input_data: Input, + *, + execution_context: ExecutionContext, + node_exec_id: str, + **kwargs, + ) -> BlockOutput: + try: + assert execution_context.graph_exec_id is not None + + # Get the exec file directory + output_dir = get_exec_file_path(execution_context.graph_exec_id, "") + os.makedirs(output_dir, exist_ok=True) + + filename, duration, title = self._download_video( + input_data.url, + input_data.quality, + input_data.output_format, + output_dir, + node_exec_id, + ) + + # Return as workspace path or data URI based on context + video_out = await self._store_output_video( + execution_context, MediaFileType(filename) + ) + + yield "video_file", video_out + yield "duration", duration + yield "title", title + yield "source_url", input_data.url + + except Exception as e: + raise BlockExecutionError( + message=f"Failed to download video: {e}", + block_name=self.name, + block_id=str(self.id), + ) from e diff --git a/autogpt_platform/backend/backend/blocks/video/duration.py b/autogpt_platform/backend/backend/blocks/video/duration.py new file mode 100644 index 0000000000..9e05d35b00 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/duration.py @@ -0,0 +1,77 @@ +"""MediaDurationBlock - Get the duration of a media file.""" + +from moviepy.audio.io.AudioFileClip import AudioFileClip +from moviepy.video.io.VideoFileClip import VideoFileClip + +from backend.blocks.video._utils import strip_chapters_inplace +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.execution import ExecutionContext +from backend.data.model import SchemaField +from backend.util.file import MediaFileType, get_exec_file_path, store_media_file + + +class MediaDurationBlock(Block): + """Get the duration of a media file (video or audio).""" + + class Input(BlockSchemaInput): + media_in: MediaFileType = SchemaField( + description="Media input (URL, data URI, or local path)." + ) + is_video: bool = SchemaField( + description="Whether the media is a video (True) or audio (False).", + default=True, + ) + + class Output(BlockSchemaOutput): + duration: float = SchemaField( + description="Duration of the media file (in seconds)." + ) + + def __init__(self): + super().__init__( + id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6", + description="Block to get the duration of a media file.", + categories={BlockCategory.MULTIMEDIA}, + input_schema=MediaDurationBlock.Input, + output_schema=MediaDurationBlock.Output, + ) + + async def run( + self, + input_data: Input, + *, + execution_context: ExecutionContext, + **kwargs, + ) -> BlockOutput: + # 1) Store the input media locally + local_media_path = await store_media_file( + file=input_data.media_in, + execution_context=execution_context, + return_format="for_local_processing", + ) + assert execution_context.graph_exec_id is not None + media_abspath = get_exec_file_path( + execution_context.graph_exec_id, local_media_path + ) + + # 2) Strip chapters to avoid MoviePy crash, then load the clip + strip_chapters_inplace(media_abspath) + clip = None + try: + if input_data.is_video: + clip = VideoFileClip(media_abspath) + else: + clip = AudioFileClip(media_abspath) + + duration = clip.duration + finally: + if clip: + clip.close() + + yield "duration", duration diff --git a/autogpt_platform/backend/backend/blocks/video/loop.py b/autogpt_platform/backend/backend/blocks/video/loop.py new file mode 100644 index 0000000000..461610f713 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/loop.py @@ -0,0 +1,115 @@ +"""LoopVideoBlock - Loop a video to a given duration or number of repeats.""" + +from typing import Optional + +from moviepy.video.fx.Loop import Loop +from moviepy.video.io.VideoFileClip import VideoFileClip + +from backend.blocks.video._utils import extract_source_name, strip_chapters_inplace +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.execution import ExecutionContext +from backend.data.model import SchemaField +from backend.util.file import MediaFileType, get_exec_file_path, store_media_file + + +class LoopVideoBlock(Block): + """Loop (repeat) a video clip until a given duration or number of loops.""" + + class Input(BlockSchemaInput): + video_in: MediaFileType = SchemaField( + description="The input video (can be a URL, data URI, or local path)." + ) + duration: Optional[float] = SchemaField( + description="Target duration (in seconds) to loop the video to. Either duration or n_loops must be provided.", + default=None, + ge=0.0, + le=3600.0, # Max 1 hour to prevent disk exhaustion + ) + n_loops: Optional[int] = SchemaField( + description="Number of times to repeat the video. Either n_loops or duration must be provided.", + default=None, + ge=1, + le=10, # Max 10 loops to prevent disk exhaustion + ) + + class Output(BlockSchemaOutput): + video_out: MediaFileType = SchemaField( + description="Looped video returned either as a relative path or a data URI." + ) + + def __init__(self): + super().__init__( + id="8bf9eef6-5451-4213-b265-25306446e94b", + description="Block to loop a video to a given duration or number of repeats.", + categories={BlockCategory.MULTIMEDIA}, + input_schema=LoopVideoBlock.Input, + output_schema=LoopVideoBlock.Output, + ) + + async def run( + self, + input_data: Input, + *, + execution_context: ExecutionContext, + **kwargs, + ) -> BlockOutput: + assert execution_context.graph_exec_id is not None + assert execution_context.node_exec_id is not None + graph_exec_id = execution_context.graph_exec_id + node_exec_id = execution_context.node_exec_id + + # 1) Store the input video locally + local_video_path = await store_media_file( + file=input_data.video_in, + execution_context=execution_context, + return_format="for_local_processing", + ) + input_abspath = get_exec_file_path(graph_exec_id, local_video_path) + + # 2) Load the clip + strip_chapters_inplace(input_abspath) + clip = None + looped_clip = None + try: + clip = VideoFileClip(input_abspath) + + # 3) Apply the loop effect + if input_data.duration: + # Loop until we reach the specified duration + looped_clip = clip.with_effects([Loop(duration=input_data.duration)]) + elif input_data.n_loops: + looped_clip = clip.with_effects([Loop(n=input_data.n_loops)]) + else: + raise ValueError("Either 'duration' or 'n_loops' must be provided.") + + assert isinstance(looped_clip, VideoFileClip) + + # 4) Save the looped output + source = extract_source_name(local_video_path) + output_filename = MediaFileType(f"{node_exec_id}_looped_{source}.mp4") + output_abspath = get_exec_file_path(graph_exec_id, output_filename) + + looped_clip = looped_clip.with_audio(clip.audio) + looped_clip.write_videofile( + output_abspath, codec="libx264", audio_codec="aac" + ) + finally: + if looped_clip: + looped_clip.close() + if clip: + clip.close() + + # Return output - for_block_output returns workspace:// if available, else data URI + video_out = await store_media_file( + file=output_filename, + execution_context=execution_context, + return_format="for_block_output", + ) + + yield "video_out", video_out diff --git a/autogpt_platform/backend/backend/blocks/video/narration.py b/autogpt_platform/backend/backend/blocks/video/narration.py new file mode 100644 index 0000000000..adf41753c8 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/narration.py @@ -0,0 +1,267 @@ +"""VideoNarrationBlock - Generate AI voice narration and add to video.""" + +import os +from typing import Literal + +from elevenlabs import ElevenLabs +from moviepy import CompositeAudioClip +from moviepy.audio.io.AudioFileClip import AudioFileClip +from moviepy.video.io.VideoFileClip import VideoFileClip + +from backend.blocks.elevenlabs._auth import ( + TEST_CREDENTIALS, + TEST_CREDENTIALS_INPUT, + ElevenLabsCredentials, + ElevenLabsCredentialsInput, +) +from backend.blocks.video._utils import ( + extract_source_name, + get_video_codecs, + strip_chapters_inplace, +) +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.execution import ExecutionContext +from backend.data.model import CredentialsField, SchemaField +from backend.util.exceptions import BlockExecutionError +from backend.util.file import MediaFileType, get_exec_file_path, store_media_file + + +class VideoNarrationBlock(Block): + """Generate AI narration and add to video.""" + + class Input(BlockSchemaInput): + credentials: ElevenLabsCredentialsInput = CredentialsField( + description="ElevenLabs API key for voice synthesis" + ) + video_in: MediaFileType = SchemaField( + description="Input video (URL, data URI, or local path)" + ) + script: str = SchemaField(description="Narration script text") + voice_id: str = SchemaField( + description="ElevenLabs voice ID", default="21m00Tcm4TlvDq8ikWAM" # Rachel + ) + model_id: Literal[ + "eleven_multilingual_v2", + "eleven_flash_v2_5", + "eleven_turbo_v2_5", + "eleven_turbo_v2", + ] = SchemaField( + description="ElevenLabs TTS model", + default="eleven_multilingual_v2", + ) + mix_mode: Literal["replace", "mix", "ducking"] = SchemaField( + description="How to combine with original audio. 'ducking' applies stronger attenuation than 'mix'.", + default="ducking", + ) + narration_volume: float = SchemaField( + description="Narration volume (0.0 to 2.0)", + default=1.0, + ge=0.0, + le=2.0, + advanced=True, + ) + original_volume: float = SchemaField( + description="Original audio volume when mixing (0.0 to 1.0)", + default=0.3, + ge=0.0, + le=1.0, + advanced=True, + ) + + class Output(BlockSchemaOutput): + video_out: MediaFileType = SchemaField( + description="Video with narration (path or data URI)" + ) + audio_file: MediaFileType = SchemaField( + description="Generated audio file (path or data URI)" + ) + + def __init__(self): + super().__init__( + id="3d036b53-859c-4b17-9826-ca340f736e0e", + description="Generate AI narration and add to video", + categories={BlockCategory.MULTIMEDIA, BlockCategory.AI}, + input_schema=self.Input, + output_schema=self.Output, + test_input={ + "video_in": "/tmp/test.mp4", + "script": "Hello world", + "credentials": TEST_CREDENTIALS_INPUT, + }, + test_credentials=TEST_CREDENTIALS, + test_output=[("video_out", str), ("audio_file", str)], + test_mock={ + "_generate_narration_audio": lambda *args: b"mock audio content", + "_add_narration_to_video": lambda *args: None, + "_store_input_video": lambda *args, **kwargs: "test.mp4", + "_store_output_video": lambda *args, **kwargs: "narrated_test.mp4", + }, + ) + + async def _store_input_video( + self, execution_context: ExecutionContext, file: MediaFileType + ) -> MediaFileType: + """Store input video. Extracted for testability.""" + return await store_media_file( + file=file, + execution_context=execution_context, + return_format="for_local_processing", + ) + + async def _store_output_video( + self, execution_context: ExecutionContext, file: MediaFileType + ) -> MediaFileType: + """Store output video. Extracted for testability.""" + return await store_media_file( + file=file, + execution_context=execution_context, + return_format="for_block_output", + ) + + def _generate_narration_audio( + self, api_key: str, script: str, voice_id: str, model_id: str + ) -> bytes: + """Generate narration audio via ElevenLabs API.""" + client = ElevenLabs(api_key=api_key) + audio_generator = client.text_to_speech.convert( + voice_id=voice_id, + text=script, + model_id=model_id, + ) + # The SDK returns a generator, collect all chunks + return b"".join(audio_generator) + + def _add_narration_to_video( + self, + video_abspath: str, + audio_abspath: str, + output_abspath: str, + mix_mode: str, + narration_volume: float, + original_volume: float, + ) -> None: + """Add narration audio to video. Extracted for testability.""" + video = None + final = None + narration_original = None + narration_scaled = None + original = None + + try: + strip_chapters_inplace(video_abspath) + video = VideoFileClip(video_abspath) + narration_original = AudioFileClip(audio_abspath) + narration_scaled = narration_original.with_volume_scaled(narration_volume) + narration = narration_scaled + + if mix_mode == "replace": + final_audio = narration + elif mix_mode == "mix": + if video.audio: + original = video.audio.with_volume_scaled(original_volume) + final_audio = CompositeAudioClip([original, narration]) + else: + final_audio = narration + else: # ducking - apply stronger attenuation + if video.audio: + # Ducking uses a much lower volume for original audio + ducking_volume = original_volume * 0.3 + original = video.audio.with_volume_scaled(ducking_volume) + final_audio = CompositeAudioClip([original, narration]) + else: + final_audio = narration + + final = video.with_audio(final_audio) + video_codec, audio_codec = get_video_codecs(output_abspath) + final.write_videofile( + output_abspath, codec=video_codec, audio_codec=audio_codec + ) + + finally: + if original: + original.close() + if narration_scaled: + narration_scaled.close() + if narration_original: + narration_original.close() + if final: + final.close() + if video: + video.close() + + async def run( + self, + input_data: Input, + *, + credentials: ElevenLabsCredentials, + execution_context: ExecutionContext, + node_exec_id: str, + **kwargs, + ) -> BlockOutput: + try: + assert execution_context.graph_exec_id is not None + + # Store the input video locally + local_video_path = await self._store_input_video( + execution_context, input_data.video_in + ) + video_abspath = get_exec_file_path( + execution_context.graph_exec_id, local_video_path + ) + + # Generate narration audio via ElevenLabs + audio_content = self._generate_narration_audio( + credentials.api_key.get_secret_value(), + input_data.script, + input_data.voice_id, + input_data.model_id, + ) + + # Save audio to exec file path + audio_filename = MediaFileType(f"{node_exec_id}_narration.mp3") + audio_abspath = get_exec_file_path( + execution_context.graph_exec_id, audio_filename + ) + os.makedirs(os.path.dirname(audio_abspath), exist_ok=True) + with open(audio_abspath, "wb") as f: + f.write(audio_content) + + # Add narration to video + source = extract_source_name(local_video_path) + output_filename = MediaFileType(f"{node_exec_id}_narrated_{source}.mp4") + output_abspath = get_exec_file_path( + execution_context.graph_exec_id, output_filename + ) + + self._add_narration_to_video( + video_abspath, + audio_abspath, + output_abspath, + input_data.mix_mode, + input_data.narration_volume, + input_data.original_volume, + ) + + # Return as workspace path or data URI based on context + video_out = await self._store_output_video( + execution_context, output_filename + ) + audio_out = await self._store_output_video( + execution_context, audio_filename + ) + + yield "video_out", video_out + yield "audio_file", audio_out + + except Exception as e: + raise BlockExecutionError( + message=f"Failed to add narration: {e}", + block_name=self.name, + block_id=str(self.id), + ) from e diff --git a/autogpt_platform/backend/backend/blocks/video/text_overlay.py b/autogpt_platform/backend/backend/blocks/video/text_overlay.py new file mode 100644 index 0000000000..cb7cfe0420 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/video/text_overlay.py @@ -0,0 +1,231 @@ +"""VideoTextOverlayBlock - Add text overlay to video.""" + +from typing import Literal + +from moviepy import CompositeVideoClip, TextClip +from moviepy.video.io.VideoFileClip import VideoFileClip + +from backend.blocks.video._utils import ( + extract_source_name, + get_video_codecs, + strip_chapters_inplace, +) +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.execution import ExecutionContext +from backend.data.model import SchemaField +from backend.util.exceptions import BlockExecutionError +from backend.util.file import MediaFileType, get_exec_file_path, store_media_file + + +class VideoTextOverlayBlock(Block): + """Add text overlay/caption to video.""" + + class Input(BlockSchemaInput): + video_in: MediaFileType = SchemaField( + description="Input video (URL, data URI, or local path)" + ) + text: str = SchemaField(description="Text to overlay on video") + position: Literal[ + "top", + "center", + "bottom", + "top-left", + "top-right", + "bottom-left", + "bottom-right", + ] = SchemaField(description="Position of text on screen", default="bottom") + start_time: float | None = SchemaField( + description="When to show text (seconds). None = entire video", + default=None, + advanced=True, + ) + end_time: float | None = SchemaField( + description="When to hide text (seconds). None = until end", + default=None, + advanced=True, + ) + font_size: int = SchemaField( + description="Font size", default=48, ge=12, le=200, advanced=True + ) + font_color: str = SchemaField( + description="Font color (hex or name)", default="white", advanced=True + ) + bg_color: str | None = SchemaField( + description="Background color behind text (None for transparent)", + default=None, + advanced=True, + ) + + class Output(BlockSchemaOutput): + video_out: MediaFileType = SchemaField( + description="Video with text overlay (path or data URI)" + ) + + def __init__(self): + super().__init__( + id="8ef14de6-cc90-430a-8cfa-3a003be92454", + description="Add text overlay/caption to video", + categories={BlockCategory.MULTIMEDIA}, + input_schema=self.Input, + output_schema=self.Output, + disabled=True, # Disable until we can lockdown imagemagick security policy + test_input={"video_in": "/tmp/test.mp4", "text": "Hello World"}, + test_output=[("video_out", str)], + test_mock={ + "_add_text_overlay": lambda *args: None, + "_store_input_video": lambda *args, **kwargs: "test.mp4", + "_store_output_video": lambda *args, **kwargs: "overlay_test.mp4", + }, + ) + + async def _store_input_video( + self, execution_context: ExecutionContext, file: MediaFileType + ) -> MediaFileType: + """Store input video. Extracted for testability.""" + return await store_media_file( + file=file, + execution_context=execution_context, + return_format="for_local_processing", + ) + + async def _store_output_video( + self, execution_context: ExecutionContext, file: MediaFileType + ) -> MediaFileType: + """Store output video. Extracted for testability.""" + return await store_media_file( + file=file, + execution_context=execution_context, + return_format="for_block_output", + ) + + def _add_text_overlay( + self, + video_abspath: str, + output_abspath: str, + text: str, + position: str, + start_time: float | None, + end_time: float | None, + font_size: int, + font_color: str, + bg_color: str | None, + ) -> None: + """Add text overlay to video. Extracted for testability.""" + video = None + final = None + txt_clip = None + try: + strip_chapters_inplace(video_abspath) + video = VideoFileClip(video_abspath) + + txt_clip = TextClip( + text=text, + font_size=font_size, + color=font_color, + bg_color=bg_color, + ) + + # Position mapping + pos_map = { + "top": ("center", "top"), + "center": ("center", "center"), + "bottom": ("center", "bottom"), + "top-left": ("left", "top"), + "top-right": ("right", "top"), + "bottom-left": ("left", "bottom"), + "bottom-right": ("right", "bottom"), + } + + txt_clip = txt_clip.with_position(pos_map[position]) + + # Set timing + start = start_time or 0 + end = end_time or video.duration + duration = max(0, end - start) + txt_clip = txt_clip.with_start(start).with_end(end).with_duration(duration) + + final = CompositeVideoClip([video, txt_clip]) + video_codec, audio_codec = get_video_codecs(output_abspath) + final.write_videofile( + output_abspath, codec=video_codec, audio_codec=audio_codec + ) + + finally: + if txt_clip: + txt_clip.close() + if final: + final.close() + if video: + video.close() + + async def run( + self, + input_data: Input, + *, + execution_context: ExecutionContext, + node_exec_id: str, + **kwargs, + ) -> BlockOutput: + # Validate time range if both are provided + if ( + input_data.start_time is not None + and input_data.end_time is not None + and input_data.end_time <= input_data.start_time + ): + raise BlockExecutionError( + message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})", + block_name=self.name, + block_id=str(self.id), + ) + + try: + assert execution_context.graph_exec_id is not None + + # Store the input video locally + local_video_path = await self._store_input_video( + execution_context, input_data.video_in + ) + video_abspath = get_exec_file_path( + execution_context.graph_exec_id, local_video_path + ) + + # Build output path + source = extract_source_name(local_video_path) + output_filename = MediaFileType(f"{node_exec_id}_overlay_{source}.mp4") + output_abspath = get_exec_file_path( + execution_context.graph_exec_id, output_filename + ) + + self._add_text_overlay( + video_abspath, + output_abspath, + input_data.text, + input_data.position, + input_data.start_time, + input_data.end_time, + input_data.font_size, + input_data.font_color, + input_data.bg_color, + ) + + # Return as workspace path or data URI based on context + video_out = await self._store_output_video( + execution_context, output_filename + ) + + yield "video_out", video_out + + except BlockExecutionError: + raise + except Exception as e: + raise BlockExecutionError( + message=f"Failed to add text overlay: {e}", + block_name=self.name, + block_id=str(self.id), + ) from e diff --git a/autogpt_platform/backend/backend/blocks/youtube.py b/autogpt_platform/backend/backend/blocks/youtube.py index e79be3e99b..6d81a86b4c 100644 --- a/autogpt_platform/backend/backend/blocks/youtube.py +++ b/autogpt_platform/backend/backend/blocks/youtube.py @@ -165,10 +165,13 @@ class TranscribeYoutubeVideoBlock(Block): credentials: WebshareProxyCredentials, **kwargs, ) -> BlockOutput: - video_id = self.extract_video_id(input_data.youtube_url) - yield "video_id", video_id + try: + video_id = self.extract_video_id(input_data.youtube_url) + transcript = self.get_transcript(video_id, credentials) + transcript_text = self.format_transcript(transcript=transcript) - transcript = self.get_transcript(video_id, credentials) - transcript_text = self.format_transcript(transcript=transcript) - - yield "transcript", transcript_text + # Only yield after all operations succeed + yield "video_id", video_id + yield "transcript", transcript_text + except Exception as e: + yield "error", str(e) diff --git a/autogpt_platform/backend/backend/data/block_cost_config.py b/autogpt_platform/backend/backend/data/block_cost_config.py index f46cc726f0..ec35afa401 100644 --- a/autogpt_platform/backend/backend/data/block_cost_config.py +++ b/autogpt_platform/backend/backend/data/block_cost_config.py @@ -36,12 +36,14 @@ from backend.blocks.replicate.replicate_block import ReplicateModelBlock from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock from backend.blocks.talking_head import CreateTalkingAvatarVideoBlock from backend.blocks.text_to_speech_block import UnrealTextToSpeechBlock +from backend.blocks.video.narration import VideoNarrationBlock from backend.data.block import Block, BlockCost, BlockCostType from backend.integrations.credentials_store import ( aiml_api_credentials, anthropic_credentials, apollo_credentials, did_credentials, + elevenlabs_credentials, enrichlayer_credentials, groq_credentials, ideogram_credentials, @@ -78,6 +80,7 @@ MODEL_COST: dict[LlmModel, int] = { LlmModel.CLAUDE_4_1_OPUS: 21, LlmModel.CLAUDE_4_OPUS: 21, LlmModel.CLAUDE_4_SONNET: 5, + LlmModel.CLAUDE_4_6_OPUS: 14, LlmModel.CLAUDE_4_5_HAIKU: 4, LlmModel.CLAUDE_4_5_OPUS: 14, LlmModel.CLAUDE_4_5_SONNET: 9, @@ -639,4 +642,16 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = { }, ), ], + VideoNarrationBlock: [ + BlockCost( + cost_amount=5, # ElevenLabs TTS cost + cost_filter={ + "credentials": { + "id": elevenlabs_credentials.id, + "provider": elevenlabs_credentials.provider, + "type": elevenlabs_credentials.type, + } + }, + ) + ], } diff --git a/autogpt_platform/backend/backend/data/credit_test.py b/autogpt_platform/backend/backend/data/credit_test.py index 391a373b86..2b10c62882 100644 --- a/autogpt_platform/backend/backend/data/credit_test.py +++ b/autogpt_platform/backend/backend/data/credit_test.py @@ -134,6 +134,16 @@ async def test_block_credit_reset(server: SpinTestServer): month1 = datetime.now(timezone.utc).replace(month=1, day=1) user_credit.time_now = lambda: month1 + # IMPORTANT: Set updatedAt to December of previous year to ensure it's + # in a different month than month1 (January). This fixes a timing bug + # where if the test runs in early February, 35 days ago would be January, + # matching the mocked month1 and preventing the refill from triggering. + dec_previous_year = month1.replace(year=month1.year - 1, month=12, day=15) + await UserBalance.prisma().update( + where={"userId": DEFAULT_USER_ID}, + data={"updatedAt": dec_previous_year}, + ) + # First call in month 1 should trigger refill balance = await user_credit.get_credits(DEFAULT_USER_ID) assert balance == REFILL_VALUE # Should get 1000 credits diff --git a/autogpt_platform/backend/backend/integrations/credentials_store.py b/autogpt_platform/backend/backend/integrations/credentials_store.py index 40a6f7269c..384405b0c7 100644 --- a/autogpt_platform/backend/backend/integrations/credentials_store.py +++ b/autogpt_platform/backend/backend/integrations/credentials_store.py @@ -224,6 +224,14 @@ openweathermap_credentials = APIKeyCredentials( expires_at=None, ) +elevenlabs_credentials = APIKeyCredentials( + id="f4a8b6c2-3d1e-4f5a-9b8c-7d6e5f4a3b2c", + provider="elevenlabs", + api_key=SecretStr(settings.secrets.elevenlabs_api_key), + title="Use Credits for ElevenLabs", + expires_at=None, +) + DEFAULT_CREDENTIALS = [ ollama_credentials, revid_credentials, @@ -252,6 +260,7 @@ DEFAULT_CREDENTIALS = [ v0_credentials, webshare_proxy_credentials, openweathermap_credentials, + elevenlabs_credentials, ] SYSTEM_CREDENTIAL_IDS = {cred.id for cred in DEFAULT_CREDENTIALS} @@ -366,6 +375,8 @@ class IntegrationCredentialsStore: all_credentials.append(webshare_proxy_credentials) if settings.secrets.openweathermap_api_key: all_credentials.append(openweathermap_credentials) + if settings.secrets.elevenlabs_api_key: + all_credentials.append(elevenlabs_credentials) return all_credentials async def get_creds_by_id( diff --git a/autogpt_platform/backend/backend/integrations/providers.py b/autogpt_platform/backend/backend/integrations/providers.py index 3af5006ca4..8a0d6fd183 100644 --- a/autogpt_platform/backend/backend/integrations/providers.py +++ b/autogpt_platform/backend/backend/integrations/providers.py @@ -18,6 +18,7 @@ class ProviderName(str, Enum): DISCORD = "discord" D_ID = "d_id" E2B = "e2b" + ELEVENLABS = "elevenlabs" FAL = "fal" GITHUB = "github" GOOGLE = "google" diff --git a/autogpt_platform/backend/backend/util/file.py b/autogpt_platform/backend/backend/util/file.py index baa9225629..1b8dbdea82 100644 --- a/autogpt_platform/backend/backend/util/file.py +++ b/autogpt_platform/backend/backend/util/file.py @@ -8,6 +8,8 @@ from pathlib import Path from typing import TYPE_CHECKING, Literal from urllib.parse import urlparse +from pydantic import BaseModel + from backend.util.cloud_storage import get_cloud_storage_handler from backend.util.request import Requests from backend.util.settings import Config @@ -17,6 +19,35 @@ from backend.util.virus_scanner import scan_content_safe if TYPE_CHECKING: from backend.data.execution import ExecutionContext + +class WorkspaceUri(BaseModel): + """Parsed workspace:// URI.""" + + file_ref: str # File ID or path (e.g. "abc123" or "/path/to/file.txt") + mime_type: str | None = None # MIME type from fragment (e.g. "video/mp4") + is_path: bool = False # True if file_ref is a path (starts with "/") + + +def parse_workspace_uri(uri: str) -> WorkspaceUri: + """Parse a workspace:// URI into its components. + + Examples: + "workspace://abc123" β†’ WorkspaceUri(file_ref="abc123", mime_type=None, is_path=False) + "workspace://abc123#video/mp4" β†’ WorkspaceUri(file_ref="abc123", mime_type="video/mp4", is_path=False) + "workspace:///path/to/file.txt" β†’ WorkspaceUri(file_ref="/path/to/file.txt", mime_type=None, is_path=True) + """ + raw = uri.removeprefix("workspace://") + mime_type: str | None = None + if "#" in raw: + raw, fragment = raw.split("#", 1) + mime_type = fragment or None + return WorkspaceUri( + file_ref=raw, + mime_type=mime_type, + is_path=raw.startswith("/"), + ) + + # Return format options for store_media_file # - "for_local_processing": Returns local file path - use with ffmpeg, MoviePy, PIL, etc. # - "for_external_api": Returns data URI (base64) - use when sending content to external APIs @@ -183,22 +214,20 @@ async def store_media_file( "This file type is only available in CoPilot sessions." ) - # Parse workspace reference - # workspace://abc123 - by file ID - # workspace:///path/to/file.txt - by virtual path - file_ref = file[12:] # Remove "workspace://" + # Parse workspace reference (strips #mimeType fragment from file ID) + ws = parse_workspace_uri(file) - if file_ref.startswith("/"): - # Path reference - workspace_content = await workspace_manager.read_file(file_ref) - file_info = await workspace_manager.get_file_info_by_path(file_ref) + if ws.is_path: + # Path reference: workspace:///path/to/file.txt + workspace_content = await workspace_manager.read_file(ws.file_ref) + file_info = await workspace_manager.get_file_info_by_path(ws.file_ref) filename = sanitize_filename( file_info.name if file_info else f"{uuid.uuid4()}.bin" ) else: - # ID reference - workspace_content = await workspace_manager.read_file_by_id(file_ref) - file_info = await workspace_manager.get_file_info(file_ref) + # ID reference: workspace://abc123 or workspace://abc123#video/mp4 + workspace_content = await workspace_manager.read_file_by_id(ws.file_ref) + file_info = await workspace_manager.get_file_info(ws.file_ref) filename = sanitize_filename( file_info.name if file_info else f"{uuid.uuid4()}.bin" ) @@ -334,7 +363,21 @@ async def store_media_file( # Don't re-save if input was already from workspace if is_from_workspace: - # Return original workspace reference + # Return original workspace reference, ensuring MIME type fragment + ws = parse_workspace_uri(file) + if not ws.mime_type: + # Add MIME type fragment if missing (older refs without it) + try: + if ws.is_path: + info = await workspace_manager.get_file_info_by_path( + ws.file_ref + ) + else: + info = await workspace_manager.get_file_info(ws.file_ref) + if info: + return MediaFileType(f"{file}#{info.mimeType}") + except Exception: + pass return MediaFileType(file) # Save new content to workspace @@ -346,7 +389,7 @@ async def store_media_file( filename=filename, overwrite=True, ) - return MediaFileType(f"workspace://{file_record.id}") + return MediaFileType(f"workspace://{file_record.id}#{file_record.mimeType}") else: raise ValueError(f"Invalid return_format: {return_format}") diff --git a/autogpt_platform/backend/backend/util/settings.py b/autogpt_platform/backend/backend/util/settings.py index aa28a4c9ac..50b7428160 100644 --- a/autogpt_platform/backend/backend/util/settings.py +++ b/autogpt_platform/backend/backend/util/settings.py @@ -656,6 +656,7 @@ class Secrets(UpdateTrackingModel["Secrets"], BaseSettings): e2b_api_key: str = Field(default="", description="E2B API key") nvidia_api_key: str = Field(default="", description="Nvidia API key") mem0_api_key: str = Field(default="", description="Mem0 API key") + elevenlabs_api_key: str = Field(default="", description="ElevenLabs API key") linear_client_id: str = Field(default="", description="Linear client ID") linear_client_secret: str = Field(default="", description="Linear client secret") diff --git a/autogpt_platform/backend/backend/util/workspace.py b/autogpt_platform/backend/backend/util/workspace.py index a2f1a61b9e..86413b640a 100644 --- a/autogpt_platform/backend/backend/util/workspace.py +++ b/autogpt_platform/backend/backend/util/workspace.py @@ -22,6 +22,7 @@ from backend.data.workspace import ( soft_delete_workspace_file, ) from backend.util.settings import Config +from backend.util.virus_scanner import scan_content_safe from backend.util.workspace_storage import compute_file_checksum, get_workspace_storage logger = logging.getLogger(__name__) @@ -187,6 +188,9 @@ class WorkspaceManager: f"{Config().max_file_size_mb}MB limit" ) + # Virus scan content before persisting (defense in depth) + await scan_content_safe(content, filename=filename) + # Determine path with session scoping if path is None: path = f"/{filename}" diff --git a/autogpt_platform/backend/poetry.lock b/autogpt_platform/backend/poetry.lock index 91ac358ade..61da8c974f 100644 --- a/autogpt_platform/backend/poetry.lock +++ b/autogpt_platform/backend/poetry.lock @@ -1169,6 +1169,29 @@ attrs = ">=21.3.0" e2b = ">=1.5.4,<2.0.0" httpx = ">=0.20.0,<1.0.0" +[[package]] +name = "elevenlabs" +version = "1.59.0" +description = "" +optional = false +python-versions = "<4.0,>=3.8" +groups = ["main"] +files = [ + {file = "elevenlabs-1.59.0-py3-none-any.whl", hash = "sha256:468145db81a0bc867708b4a8619699f75583e9481b395ec1339d0b443da771ed"}, + {file = "elevenlabs-1.59.0.tar.gz", hash = "sha256:16e735bd594e86d415dd445d249c8cc28b09996cfd627fbc10102c0a84698859"}, +] + +[package.dependencies] +httpx = ">=0.21.2" +pydantic = ">=1.9.2" +pydantic-core = ">=2.18.2,<3.0.0" +requests = ">=2.20" +typing_extensions = ">=4.0.0" +websockets = ">=11.0" + +[package.extras] +pyaudio = ["pyaudio (>=0.2.14)"] + [[package]] name = "email-validator" version = "2.2.0" @@ -7361,6 +7384,28 @@ files = [ defusedxml = ">=0.7.1,<0.8.0" requests = "*" +[[package]] +name = "yt-dlp" +version = "2025.12.8" +description = "A feature-rich command-line audio/video downloader" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "yt_dlp-2025.12.8-py3-none-any.whl", hash = "sha256:36e2584342e409cfbfa0b5e61448a1c5189e345cf4564294456ee509e7d3e065"}, + {file = "yt_dlp-2025.12.8.tar.gz", hash = "sha256:b773c81bb6b71cb2c111cfb859f453c7a71cf2ef44eff234ff155877184c3e4f"}, +] + +[package.extras] +build = ["build", "hatchling (>=1.27.0)", "pip", "setuptools (>=71.0.2)", "wheel"] +curl-cffi = ["curl-cffi (>=0.5.10,<0.6.dev0 || >=0.10.dev0,<0.14) ; implementation_name == \"cpython\""] +default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=2.0.2,<3)", "websockets (>=13.0)", "yt-dlp-ejs (==0.3.2)"] +dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.14.0,<0.15.0)"] +pyinstaller = ["pyinstaller (>=6.17.0)"] +secretstorage = ["cffi", "secretstorage"] +static-analysis = ["autopep8 (>=2.0,<3.0)", "ruff (>=0.14.0,<0.15.0)"] +test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"] + [[package]] name = "zerobouncesdk" version = "1.1.2" @@ -7512,4 +7557,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.14" -content-hash = "ee5742dc1a9df50dfc06d4b26a1682cbb2b25cab6b79ce5625ec272f93e4f4bf" +content-hash = "8239323f9ae6713224dffd1fe8ba8b449fe88b6c3c7a90940294a74f43a0387a" diff --git a/autogpt_platform/backend/pyproject.toml b/autogpt_platform/backend/pyproject.toml index fe263e47c0..24aea39f33 100644 --- a/autogpt_platform/backend/pyproject.toml +++ b/autogpt_platform/backend/pyproject.toml @@ -20,6 +20,7 @@ click = "^8.2.0" cryptography = "^45.0" discord-py = "^2.5.2" e2b-code-interpreter = "^1.5.2" +elevenlabs = "^1.50.0" fastapi = "^0.116.1" feedparser = "^6.0.11" flake8 = "^7.3.0" @@ -71,6 +72,7 @@ tweepy = "^4.16.0" uvicorn = { extras = ["standard"], version = "^0.35.0" } websockets = "^15.0" youtube-transcript-api = "^1.2.1" +yt-dlp = "2025.12.08" zerobouncesdk = "^1.1.2" # NOTE: please insert new dependencies in their alphabetical location pytest-snapshot = "^0.9.0" diff --git a/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx b/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx index 4213711447..c58bdac642 100644 --- a/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx +++ b/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/DataTable.tsx @@ -1,6 +1,6 @@ import { beautifyString } from "@/lib/utils"; import { Clipboard, Maximize2 } from "lucide-react"; -import React, { useState } from "react"; +import React, { useMemo, useState } from "react"; import { Button } from "../../../../../components/__legacy__/ui/button"; import { ContentRenderer } from "../../../../../components/__legacy__/ui/render"; import { @@ -11,6 +11,12 @@ import { TableHeader, TableRow, } from "../../../../../components/__legacy__/ui/table"; +import type { OutputMetadata } from "@/components/contextual/OutputRenderers"; +import { + globalRegistry, + OutputItem, +} from "@/components/contextual/OutputRenderers"; +import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag"; import { useToast } from "../../../../../components/molecules/Toast/use-toast"; import ExpandableOutputDialog from "./ExpandableOutputDialog"; @@ -26,6 +32,9 @@ export default function DataTable({ data, }: DataTableProps) { const { toast } = useToast(); + const enableEnhancedOutputHandling = useGetFlag( + Flag.ENABLE_ENHANCED_OUTPUT_HANDLING, + ); const [expandedDialog, setExpandedDialog] = useState<{ isOpen: boolean; execId: string; @@ -33,6 +42,15 @@ export default function DataTable({ data: any[]; } | null>(null); + // Prepare renderers for each item when enhanced mode is enabled + const getItemRenderer = useMemo(() => { + if (!enableEnhancedOutputHandling) return null; + return (item: unknown) => { + const metadata: OutputMetadata = {}; + return globalRegistry.getRenderer(item, metadata); + }; + }, [enableEnhancedOutputHandling]); + const copyData = (pin: string, data: string) => { navigator.clipboard.writeText(data).then(() => { toast({ @@ -102,15 +120,31 @@ export default function DataTable({ - {value.map((item, index) => ( - - - {index < value.length - 1 && ", "} - - ))} + {value.map((item, index) => { + const renderer = getItemRenderer?.(item); + if (enableEnhancedOutputHandling && renderer) { + const metadata: OutputMetadata = {}; + return ( + + + {index < value.length - 1 && ", "} + + ); + } + return ( + + + {index < value.length - 1 && ", "} + + ); + })} diff --git a/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/NodeOutputs.tsx b/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/NodeOutputs.tsx index d90b7d6a4c..2111db7d99 100644 --- a/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/NodeOutputs.tsx +++ b/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/NodeOutputs.tsx @@ -1,8 +1,14 @@ -import React, { useContext, useState } from "react"; +import React, { useContext, useMemo, useState } from "react"; import { Button } from "@/components/__legacy__/ui/button"; import { Maximize2 } from "lucide-react"; import * as Separator from "@radix-ui/react-separator"; import { ContentRenderer } from "@/components/__legacy__/ui/render"; +import type { OutputMetadata } from "@/components/contextual/OutputRenderers"; +import { + globalRegistry, + OutputItem, +} from "@/components/contextual/OutputRenderers"; +import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag"; import { beautifyString } from "@/lib/utils"; @@ -21,6 +27,9 @@ export default function NodeOutputs({ data, }: NodeOutputsProps) { const builderContext = useContext(BuilderContext); + const enableEnhancedOutputHandling = useGetFlag( + Flag.ENABLE_ENHANCED_OUTPUT_HANDLING, + ); const [expandedDialog, setExpandedDialog] = useState<{ isOpen: boolean; @@ -37,6 +46,15 @@ export default function NodeOutputs({ const { getNodeTitle } = builderContext; + // Prepare renderers for each item when enhanced mode is enabled + const getItemRenderer = useMemo(() => { + if (!enableEnhancedOutputHandling) return null; + return (item: unknown) => { + const metadata: OutputMetadata = {}; + return globalRegistry.getRenderer(item, metadata); + }; + }, [enableEnhancedOutputHandling]); + const getBeautifiedPinName = (pin: string) => { if (!pin.startsWith("tools_^_")) { return beautifyString(pin); @@ -87,15 +105,31 @@ export default function NodeOutputs({
Data:
- {dataArray.slice(0, 10).map((item, index) => ( - - - {index < Math.min(dataArray.length, 10) - 1 && ", "} - - ))} + {dataArray.slice(0, 10).map((item, index) => { + const renderer = getItemRenderer?.(item); + if (enableEnhancedOutputHandling && renderer) { + const metadata: OutputMetadata = {}; + return ( + + + {index < Math.min(dataArray.length, 10) - 1 && ", "} + + ); + } + return ( + + + {index < Math.min(dataArray.length, 10) - 1 && ", "} + + ); + })} {dataArray.length > 10 && (
diff --git a/autogpt_platform/frontend/src/components/__legacy__/ui/render.tsx b/autogpt_platform/frontend/src/components/__legacy__/ui/render.tsx index 5173326f23..b290c51809 100644 --- a/autogpt_platform/frontend/src/components/__legacy__/ui/render.tsx +++ b/autogpt_platform/frontend/src/components/__legacy__/ui/render.tsx @@ -22,7 +22,7 @@ const isValidVideoUrl = (url: string): boolean => { if (url.startsWith("data:video")) { return true; } - const videoExtensions = /\.(mp4|webm|ogg)$/i; + const videoExtensions = /\.(mp4|webm|ogg|mov|avi|mkv|m4v)$/i; const youtubeRegex = /^(https?:\/\/)?(www\.)?(youtube\.com|youtu\.?be)\/.+$/; const cleanedUrl = url.split("?")[0]; return ( @@ -44,11 +44,29 @@ const isValidAudioUrl = (url: string): boolean => { if (url.startsWith("data:audio")) { return true; } - const audioExtensions = /\.(mp3|wav)$/i; + const audioExtensions = /\.(mp3|wav|ogg|m4a|aac|flac)$/i; const cleanedUrl = url.split("?")[0]; return isValidMediaUri(url) && audioExtensions.test(cleanedUrl); }; +const getVideoMimeType = (url: string): string => { + if (url.startsWith("data:video/")) { + const match = url.match(/^data:(video\/[^;]+)/); + return match?.[1] || "video/mp4"; + } + const extension = url.split("?")[0].split(".").pop()?.toLowerCase(); + const mimeMap: Record = { + mp4: "video/mp4", + webm: "video/webm", + ogg: "video/ogg", + mov: "video/quicktime", + avi: "video/x-msvideo", + mkv: "video/x-matroska", + m4v: "video/mp4", + }; + return mimeMap[extension || ""] || "video/mp4"; +}; + const VideoRenderer: React.FC<{ videoUrl: string }> = ({ videoUrl }) => { const videoId = getYouTubeVideoId(videoUrl); return ( @@ -63,7 +81,7 @@ const VideoRenderer: React.FC<{ videoUrl: string }> = ({ videoUrl }) => { > ) : ( )} diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/ChatMessage.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/ChatMessage.tsx index 2ac433a272..44dae40eb4 100644 --- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/ChatMessage.tsx +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/ChatMessage.tsx @@ -102,18 +102,6 @@ export function ChatMessage({ } } - function handleClarificationAnswers(answers: Record) { - if (onSendMessage) { - const contextMessage = Object.entries(answers) - .map(([keyword, answer]) => `${keyword}: ${answer}`) - .join("\n"); - - onSendMessage( - `I have the answers to your questions:\n\n${contextMessage}\n\nPlease proceed with creating the agent.`, - ); - } - } - const handleCopy = useCallback( async function handleCopy() { if (message.type !== "message") return; @@ -162,6 +150,22 @@ export function ChatMessage({ .slice(index + 1) .some((m) => m.type === "message" && m.role === "user"); + const handleClarificationAnswers = (answers: Record) => { + if (onSendMessage) { + // Iterate over questions (preserves original order) instead of answers + const contextMessage = message.questions + .map((q) => { + const answer = answers[q.keyword] || ""; + return `> ${q.question}\n\n${answer}`; + }) + .join("\n\n"); + + onSendMessage( + `**Here are my answers:**\n\n${contextMessage}\n\nPlease proceed with creating the agent.`, + ); + } + }; + return (
); diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/MarkdownContent/MarkdownContent.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/MarkdownContent/MarkdownContent.tsx index 3dd5eca692..ecadbe938b 100644 --- a/autogpt_platform/frontend/src/components/contextual/Chat/components/MarkdownContent/MarkdownContent.tsx +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/MarkdownContent/MarkdownContent.tsx @@ -3,7 +3,7 @@ import { getGetWorkspaceDownloadFileByIdUrl } from "@/app/api/__generated__/endpoints/workspace/workspace"; import { cn } from "@/lib/utils"; import { EyeSlash } from "@phosphor-icons/react"; -import React from "react"; +import React, { useState } from "react"; import ReactMarkdown from "react-markdown"; import remarkGfm from "remark-gfm"; @@ -48,7 +48,9 @@ interface InputProps extends React.InputHTMLAttributes { */ function resolveWorkspaceUrl(src: string): string { if (src.startsWith("workspace://")) { - const fileId = src.replace("workspace://", ""); + // Strip MIME type fragment if present (e.g., workspace://abc123#video/mp4 β†’ abc123) + const withoutPrefix = src.replace("workspace://", ""); + const fileId = withoutPrefix.split("#")[0]; // Use the generated API URL helper to get the correct path const apiPath = getGetWorkspaceDownloadFileByIdUrl(fileId); // Route through the Next.js proxy (same pattern as customMutator for client-side) @@ -65,13 +67,49 @@ function isWorkspaceImage(src: string | undefined): boolean { return src?.includes("/workspace/files/") ?? false; } +/** + * Renders a workspace video with controls and an optional "AI cannot see" badge. + */ +function WorkspaceVideo({ + src, + aiCannotSee, +}: { + src: string; + aiCannotSee: boolean; +}) { + return ( + + + {aiCannotSee && ( + + + AI cannot see this video + + )} + + ); +} + /** * Custom image component that shows an indicator when the AI cannot see the image. + * Also handles the "video:" alt-text prefix convention to render
); diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ThinkingMessage/ThinkingMessage.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ThinkingMessage/ThinkingMessage.tsx index 047c2277b0..2202705e65 100644 --- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ThinkingMessage/ThinkingMessage.tsx +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ThinkingMessage/ThinkingMessage.tsx @@ -1,6 +1,8 @@ +import { Progress } from "@/components/atoms/Progress/Progress"; import { cn } from "@/lib/utils"; import { useEffect, useRef, useState } from "react"; import { AIChatBubble } from "../AIChatBubble/AIChatBubble"; +import { useAsymptoticProgress } from "../ToolCallMessage/useAsymptoticProgress"; export interface ThinkingMessageProps { className?: string; @@ -11,6 +13,7 @@ export function ThinkingMessage({ className }: ThinkingMessageProps) { const [showCoffeeMessage, setShowCoffeeMessage] = useState(false); const timerRef = useRef(null); const coffeeTimerRef = useRef(null); + const progress = useAsymptoticProgress(showCoffeeMessage); useEffect(() => { if (timerRef.current === null) { @@ -49,9 +52,18 @@ export function ThinkingMessage({ className }: ThinkingMessageProps) {
{showCoffeeMessage ? ( - - This could take a few minutes, grab a coffee β˜•οΈ - +
+
+
+ Working on it... + {Math.round(progress)}% +
+ +
+ + This could take a few minutes, grab a coffee β˜•οΈ + +
) : showSlowLoader ? ( Taking a bit more time... diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolCallMessage/useAsymptoticProgress.ts b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolCallMessage/useAsymptoticProgress.ts new file mode 100644 index 0000000000..cf1b89e7c4 --- /dev/null +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolCallMessage/useAsymptoticProgress.ts @@ -0,0 +1,50 @@ +import { useEffect, useRef, useState } from "react"; + +/** + * Hook that returns a progress value that starts fast and slows down, + * asymptotically approaching but never reaching the max value. + * + * Uses a half-life formula: progress = max * (1 - 0.5^(time/halfLife)) + * This creates the "game loading bar" effect where: + * - 50% is reached at halfLifeSeconds + * - 75% is reached at 2 * halfLifeSeconds + * - 87.5% is reached at 3 * halfLifeSeconds + * - and so on... + * + * @param isActive - Whether the progress should be animating + * @param halfLifeSeconds - Time in seconds to reach 50% progress (default: 30) + * @param maxProgress - Maximum progress value to approach (default: 100) + * @param intervalMs - Update interval in milliseconds (default: 100) + * @returns Current progress value (0-maxProgress) + */ +export function useAsymptoticProgress( + isActive: boolean, + halfLifeSeconds = 30, + maxProgress = 100, + intervalMs = 100, +) { + const [progress, setProgress] = useState(0); + const elapsedTimeRef = useRef(0); + + useEffect(() => { + if (!isActive) { + setProgress(0); + elapsedTimeRef.current = 0; + return; + } + + const interval = setInterval(() => { + elapsedTimeRef.current += intervalMs / 1000; + // Half-life approach: progress = max * (1 - 0.5^(time/halfLife)) + // At t=halfLife: 50%, at t=2*halfLife: 75%, at t=3*halfLife: 87.5%, etc. + const newProgress = + maxProgress * + (1 - Math.pow(0.5, elapsedTimeRef.current / halfLifeSeconds)); + setProgress(newProgress); + }, intervalMs); + + return () => clearInterval(interval); + }, [isActive, halfLifeSeconds, maxProgress, intervalMs]); + + return progress; +} diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/AgentCreatedPrompt.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/AgentCreatedPrompt.tsx new file mode 100644 index 0000000000..8494452eea --- /dev/null +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/AgentCreatedPrompt.tsx @@ -0,0 +1,128 @@ +"use client"; + +import { useGetV2GetLibraryAgent } from "@/app/api/__generated__/endpoints/library/library"; +import { GraphExecutionJobInfo } from "@/app/api/__generated__/models/graphExecutionJobInfo"; +import { GraphExecutionMeta } from "@/app/api/__generated__/models/graphExecutionMeta"; +import { RunAgentModal } from "@/app/(platform)/library/agents/[id]/components/NewAgentLibraryView/components/modals/RunAgentModal/RunAgentModal"; +import { Button } from "@/components/atoms/Button/Button"; +import { Text } from "@/components/atoms/Text/Text"; +import { + CheckCircleIcon, + PencilLineIcon, + PlayIcon, +} from "@phosphor-icons/react"; +import { AIChatBubble } from "../AIChatBubble/AIChatBubble"; + +interface Props { + agentName: string; + libraryAgentId: string; + onSendMessage?: (content: string) => void; +} + +export function AgentCreatedPrompt({ + agentName, + libraryAgentId, + onSendMessage, +}: Props) { + // Fetch library agent eagerly so modal is ready when user clicks + const { data: libraryAgentResponse, isLoading } = useGetV2GetLibraryAgent( + libraryAgentId, + { + query: { + enabled: !!libraryAgentId, + }, + }, + ); + + const libraryAgent = + libraryAgentResponse?.status === 200 ? libraryAgentResponse.data : null; + + function handleRunWithPlaceholders() { + onSendMessage?.( + `Run the agent "${agentName}" with placeholder/example values so I can test it.`, + ); + } + + function handleRunCreated(execution: GraphExecutionMeta) { + onSendMessage?.( + `I've started the agent "${agentName}". The execution ID is ${execution.id}. Please monitor its progress and let me know when it completes.`, + ); + } + + function handleScheduleCreated(schedule: GraphExecutionJobInfo) { + const scheduleInfo = schedule.cron + ? `with cron schedule "${schedule.cron}"` + : "to run on the specified schedule"; + onSendMessage?.( + `I've scheduled the agent "${agentName}" ${scheduleInfo}. The schedule ID is ${schedule.id}.`, + ); + } + + return ( + +
+
+
+ +
+
+ + Agent Created Successfully + + + "{agentName}" is ready to test + +
+
+ +
+ + Ready to test? + +
+ + {libraryAgent ? ( + + + Run with my inputs + + } + agent={libraryAgent} + onRunCreated={handleRunCreated} + onScheduleCreated={handleScheduleCreated} + /> + ) : ( + + )} +
+ + or just ask me + +
+
+
+ ); +} diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/ToolResponseMessage.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/ToolResponseMessage.tsx index 27da02beb8..53d5f1ef96 100644 --- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/ToolResponseMessage.tsx +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/ToolResponseMessage.tsx @@ -2,11 +2,13 @@ import { Text } from "@/components/atoms/Text/Text"; import { cn } from "@/lib/utils"; import type { ToolResult } from "@/types/chat"; import { WarningCircleIcon } from "@phosphor-icons/react"; +import { AgentCreatedPrompt } from "./AgentCreatedPrompt"; import { AIChatBubble } from "../AIChatBubble/AIChatBubble"; import { MarkdownContent } from "../MarkdownContent/MarkdownContent"; import { formatToolResponse, getErrorMessage, + isAgentSavedResponse, isErrorResponse, } from "./helpers"; @@ -16,6 +18,7 @@ export interface ToolResponseMessageProps { result?: ToolResult; success?: boolean; className?: string; + onSendMessage?: (content: string) => void; } export function ToolResponseMessage({ @@ -24,6 +27,7 @@ export function ToolResponseMessage({ result, success: _success, className, + onSendMessage, }: ToolResponseMessageProps) { if (isErrorResponse(result)) { const errorMessage = getErrorMessage(result); @@ -43,6 +47,18 @@ export function ToolResponseMessage({ ); } + // Check for agent_saved response - show special prompt + const agentSavedData = isAgentSavedResponse(result); + if (agentSavedData.isSaved) { + return ( + + ); + } + const formattedText = formatToolResponse(result, toolName); return ( diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts index 2397176603..63da171f54 100644 --- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts @@ -6,6 +6,43 @@ function stripInternalReasoning(content: string): string { .trim(); } +export interface AgentSavedData { + isSaved: boolean; + agentName: string; + agentId: string; + libraryAgentId: string; + libraryAgentLink: string; +} + +export function isAgentSavedResponse(result: unknown): AgentSavedData { + if (typeof result !== "object" || result === null) { + return { + isSaved: false, + agentName: "", + agentId: "", + libraryAgentId: "", + libraryAgentLink: "", + }; + } + const response = result as Record; + if (response.type === "agent_saved") { + return { + isSaved: true, + agentName: (response.agent_name as string) || "Agent", + agentId: (response.agent_id as string) || "", + libraryAgentId: (response.library_agent_id as string) || "", + libraryAgentLink: (response.library_agent_link as string) || "", + }; + } + return { + isSaved: false, + agentName: "", + agentId: "", + libraryAgentId: "", + libraryAgentLink: "", + }; +} + export function isErrorResponse(result: unknown): boolean { if (typeof result === "string") { const lower = result.toLowerCase(); @@ -39,69 +76,101 @@ export function getErrorMessage(result: unknown): string { /** * Check if a value is a workspace file reference. + * Format: workspace://{fileId} or workspace://{fileId}#{mimeType} */ function isWorkspaceRef(value: unknown): value is string { return typeof value === "string" && value.startsWith("workspace://"); } /** - * Check if a workspace reference appears to be an image based on common patterns. - * Since workspace refs don't have extensions, we check the context or assume image - * for certain block types. - * - * TODO: Replace keyword matching with MIME type encoded in workspace ref. - * e.g., workspace://abc123#image/png or workspace://abc123#video/mp4 - * This would let frontend render correctly without fragile keyword matching. + * Extract MIME type from a workspace reference fragment. + * e.g., "workspace://abc123#video/mp4" β†’ "video/mp4" + * Returns undefined if no fragment is present. */ -function isLikelyImageRef(value: string, outputKey?: string): boolean { - if (!isWorkspaceRef(value)) return false; - - // Check output key name for video-related hints (these are NOT images) - const videoKeywords = ["video", "mp4", "mov", "avi", "webm", "movie", "clip"]; - if (outputKey) { - const lowerKey = outputKey.toLowerCase(); - if (videoKeywords.some((kw) => lowerKey.includes(kw))) { - return false; - } - } - - // Check output key name for image-related hints - const imageKeywords = [ - "image", - "img", - "photo", - "picture", - "thumbnail", - "avatar", - "icon", - "screenshot", - ]; - if (outputKey) { - const lowerKey = outputKey.toLowerCase(); - if (imageKeywords.some((kw) => lowerKey.includes(kw))) { - return true; - } - } - - // Default to treating workspace refs as potential images - // since that's the most common case for generated content - return true; +function getWorkspaceMimeType(value: string): string | undefined { + const hashIndex = value.indexOf("#"); + if (hashIndex === -1) return undefined; + return value.slice(hashIndex + 1) || undefined; } /** - * Format a single output value, converting workspace refs to markdown images. + * Determine the media category of a workspace ref or data URI. + * Uses the MIME type fragment on workspace refs when available, + * falls back to output key keyword matching for older refs without it. */ -function formatOutputValue(value: unknown, outputKey?: string): string { - if (isWorkspaceRef(value) && isLikelyImageRef(value, outputKey)) { - // Format as markdown image - return `![${outputKey || "Generated image"}](${value})`; +function getMediaCategory( + value: string, + outputKey?: string, +): "video" | "image" | "audio" | "unknown" { + // Data URIs carry their own MIME type + if (value.startsWith("data:video/")) return "video"; + if (value.startsWith("data:image/")) return "image"; + if (value.startsWith("data:audio/")) return "audio"; + + // Workspace refs: prefer MIME type fragment + if (isWorkspaceRef(value)) { + const mime = getWorkspaceMimeType(value); + if (mime) { + if (mime.startsWith("video/")) return "video"; + if (mime.startsWith("image/")) return "image"; + if (mime.startsWith("audio/")) return "audio"; + return "unknown"; + } + + // Fallback: keyword matching on output key for older refs without fragment + if (outputKey) { + const lowerKey = outputKey.toLowerCase(); + + const videoKeywords = [ + "video", + "mp4", + "mov", + "avi", + "webm", + "movie", + "clip", + ]; + if (videoKeywords.some((kw) => lowerKey.includes(kw))) return "video"; + + const imageKeywords = [ + "image", + "img", + "photo", + "picture", + "thumbnail", + "avatar", + "icon", + "screenshot", + ]; + if (imageKeywords.some((kw) => lowerKey.includes(kw))) return "image"; + } + + // Default to image for backward compatibility + return "image"; } + return "unknown"; +} + +/** + * Format a single output value, converting workspace refs to markdown images/videos. + * Videos use a "video:" alt-text prefix so the MarkdownContent renderer can + * distinguish them from images and render a