mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-23 14:08:02 -05:00
Compare commits
24 Commits
swiftyos/r
...
feature/vi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6cd62c4d50 | ||
|
|
9f4c33a695 | ||
|
|
b0debe9488 | ||
|
|
b20767bde9 | ||
|
|
b9a9481381 | ||
|
|
d2d2a0c0c9 | ||
|
|
521f69220d | ||
|
|
368adc985d | ||
|
|
8c3216f0a2 | ||
|
|
94063616e5 | ||
|
|
2433a86cb1 | ||
|
|
0ede203f8e | ||
|
|
dc751316c5 | ||
|
|
e7fb54e6af | ||
|
|
7b76f4d1e4 | ||
|
|
3cc56de0fa | ||
|
|
d2bead0f7a | ||
|
|
f8d3893c16 | ||
|
|
1cfbc0dd08 | ||
|
|
ff84643b48 | ||
|
|
c19c3c834a | ||
|
|
d0f7ba8cfd | ||
|
|
2a855f4bd0 | ||
|
|
b93bb3b9f8 |
@@ -152,6 +152,7 @@ REPLICATE_API_KEY=
|
||||
REVID_API_KEY=
|
||||
SCREENSHOTONE_API_KEY=
|
||||
UNREAL_SPEECH_API_KEY=
|
||||
ELEVENLABS_API_KEY=
|
||||
|
||||
# Data & Search Services
|
||||
E2B_API_KEY=
|
||||
|
||||
@@ -5,9 +5,9 @@ from asyncio import CancelledError
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import Any
|
||||
|
||||
import openai
|
||||
import orjson
|
||||
from langfuse import get_client
|
||||
from langfuse import get_client, propagate_attributes
|
||||
from langfuse.openai import openai # type: ignore
|
||||
from openai import (
|
||||
APIConnectionError,
|
||||
APIError,
|
||||
@@ -276,301 +276,347 @@ async def stream_chat_completion(
|
||||
# Build system prompt with business understanding
|
||||
system_prompt, understanding = await _build_system_prompt(user_id)
|
||||
|
||||
# Initialize variables for streaming
|
||||
assistant_response = ChatMessage(
|
||||
role="assistant",
|
||||
content="",
|
||||
)
|
||||
accumulated_tool_calls: list[dict[str, Any]] = []
|
||||
has_saved_assistant_message = False
|
||||
has_appended_streaming_message = False
|
||||
last_cache_time = 0.0
|
||||
last_cache_content_len = 0
|
||||
# Create Langfuse trace for this LLM call (each call gets its own trace, grouped by session_id)
|
||||
# Using v3 SDK: start_observation creates a root span, update_trace sets trace-level attributes
|
||||
input = message
|
||||
if not message and tool_call_response:
|
||||
input = tool_call_response
|
||||
|
||||
has_yielded_end = False
|
||||
has_yielded_error = False
|
||||
has_done_tool_call = False
|
||||
has_received_text = False
|
||||
text_streaming_ended = False
|
||||
tool_response_messages: list[ChatMessage] = []
|
||||
should_retry = False
|
||||
|
||||
# Generate unique IDs for AI SDK protocol
|
||||
import uuid as uuid_module
|
||||
|
||||
message_id = str(uuid_module.uuid4())
|
||||
text_block_id = str(uuid_module.uuid4())
|
||||
|
||||
# Yield message start
|
||||
yield StreamStart(messageId=message_id)
|
||||
|
||||
try:
|
||||
async for chunk in _stream_chat_chunks(
|
||||
session=session,
|
||||
tools=tools,
|
||||
system_prompt=system_prompt,
|
||||
text_block_id=text_block_id,
|
||||
langfuse = get_client()
|
||||
with langfuse.start_as_current_observation(
|
||||
as_type="span",
|
||||
name="user-copilot-request",
|
||||
input=input,
|
||||
) as span:
|
||||
with propagate_attributes(
|
||||
session_id=session_id,
|
||||
user_id=user_id,
|
||||
tags=["copilot"],
|
||||
metadata={
|
||||
"users_information": format_understanding_for_prompt(understanding)[
|
||||
:200
|
||||
] # langfuse only accepts upto to 200 chars
|
||||
},
|
||||
):
|
||||
|
||||
if isinstance(chunk, StreamTextStart):
|
||||
# Emit text-start before first text delta
|
||||
if not has_received_text:
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamTextDelta):
|
||||
delta = chunk.delta or ""
|
||||
assert assistant_response.content is not None
|
||||
assistant_response.content += delta
|
||||
has_received_text = True
|
||||
if not has_appended_streaming_message:
|
||||
session.messages.append(assistant_response)
|
||||
has_appended_streaming_message = True
|
||||
current_time = time.monotonic()
|
||||
content_len = len(assistant_response.content)
|
||||
if (
|
||||
current_time - last_cache_time >= 1.0
|
||||
and content_len > last_cache_content_len
|
||||
# Initialize variables that will be used in finally block (must be defined before try)
|
||||
assistant_response = ChatMessage(
|
||||
role="assistant",
|
||||
content="",
|
||||
)
|
||||
accumulated_tool_calls: list[dict[str, Any]] = []
|
||||
has_saved_assistant_message = False
|
||||
has_appended_streaming_message = False
|
||||
last_cache_time = 0.0
|
||||
last_cache_content_len = 0
|
||||
|
||||
# Wrap main logic in try/finally to ensure Langfuse observations are always ended
|
||||
has_yielded_end = False
|
||||
has_yielded_error = False
|
||||
has_done_tool_call = False
|
||||
has_received_text = False
|
||||
text_streaming_ended = False
|
||||
tool_response_messages: list[ChatMessage] = []
|
||||
should_retry = False
|
||||
|
||||
# Generate unique IDs for AI SDK protocol
|
||||
import uuid as uuid_module
|
||||
|
||||
message_id = str(uuid_module.uuid4())
|
||||
text_block_id = str(uuid_module.uuid4())
|
||||
|
||||
# Yield message start
|
||||
yield StreamStart(messageId=message_id)
|
||||
|
||||
try:
|
||||
async for chunk in _stream_chat_chunks(
|
||||
session=session,
|
||||
tools=tools,
|
||||
system_prompt=system_prompt,
|
||||
text_block_id=text_block_id,
|
||||
):
|
||||
|
||||
if isinstance(chunk, StreamTextStart):
|
||||
# Emit text-start before first text delta
|
||||
if not has_received_text:
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamTextDelta):
|
||||
delta = chunk.delta or ""
|
||||
assert assistant_response.content is not None
|
||||
assistant_response.content += delta
|
||||
has_received_text = True
|
||||
if not has_appended_streaming_message:
|
||||
session.messages.append(assistant_response)
|
||||
has_appended_streaming_message = True
|
||||
current_time = time.monotonic()
|
||||
content_len = len(assistant_response.content)
|
||||
if (
|
||||
current_time - last_cache_time >= 1.0
|
||||
and content_len > last_cache_content_len
|
||||
):
|
||||
try:
|
||||
await cache_chat_session(session)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to cache partial session {session.session_id}: {e}"
|
||||
)
|
||||
last_cache_time = current_time
|
||||
last_cache_content_len = content_len
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamTextEnd):
|
||||
# Emit text-end after text completes
|
||||
if has_received_text and not text_streaming_ended:
|
||||
text_streaming_ended = True
|
||||
if assistant_response.content:
|
||||
logger.warn(
|
||||
f"StreamTextEnd: Attempting to set output {assistant_response.content}"
|
||||
)
|
||||
span.update_trace(output=assistant_response.content)
|
||||
span.update(output=assistant_response.content)
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamToolInputStart):
|
||||
# Emit text-end before first tool call, but only if we've received text
|
||||
if has_received_text and not text_streaming_ended:
|
||||
yield StreamTextEnd(id=text_block_id)
|
||||
text_streaming_ended = True
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamToolInputAvailable):
|
||||
# Accumulate tool calls in OpenAI format
|
||||
accumulated_tool_calls.append(
|
||||
{
|
||||
"id": chunk.toolCallId,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": chunk.toolName,
|
||||
"arguments": orjson.dumps(chunk.input).decode(
|
||||
"utf-8"
|
||||
),
|
||||
},
|
||||
}
|
||||
)
|
||||
elif isinstance(chunk, StreamToolOutputAvailable):
|
||||
result_content = (
|
||||
chunk.output
|
||||
if isinstance(chunk.output, str)
|
||||
else orjson.dumps(chunk.output).decode("utf-8")
|
||||
)
|
||||
tool_response_messages.append(
|
||||
ChatMessage(
|
||||
role="tool",
|
||||
content=result_content,
|
||||
tool_call_id=chunk.toolCallId,
|
||||
)
|
||||
)
|
||||
has_done_tool_call = True
|
||||
# Track if any tool execution failed
|
||||
if not chunk.success:
|
||||
logger.warning(
|
||||
f"Tool {chunk.toolName} (ID: {chunk.toolCallId}) execution failed"
|
||||
)
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamFinish):
|
||||
if not has_done_tool_call:
|
||||
# Emit text-end before finish if we received text but haven't closed it
|
||||
if has_received_text and not text_streaming_ended:
|
||||
yield StreamTextEnd(id=text_block_id)
|
||||
text_streaming_ended = True
|
||||
|
||||
# Save assistant message before yielding finish to ensure it's persisted
|
||||
# even if client disconnects immediately after receiving StreamFinish
|
||||
if not has_saved_assistant_message:
|
||||
messages_to_save_early: list[ChatMessage] = []
|
||||
if accumulated_tool_calls:
|
||||
assistant_response.tool_calls = (
|
||||
accumulated_tool_calls
|
||||
)
|
||||
if not has_appended_streaming_message and (
|
||||
assistant_response.content
|
||||
or assistant_response.tool_calls
|
||||
):
|
||||
messages_to_save_early.append(assistant_response)
|
||||
messages_to_save_early.extend(tool_response_messages)
|
||||
|
||||
if messages_to_save_early:
|
||||
session.messages.extend(messages_to_save_early)
|
||||
logger.info(
|
||||
f"Saving assistant message before StreamFinish: "
|
||||
f"content_len={len(assistant_response.content or '')}, "
|
||||
f"tool_calls={len(assistant_response.tool_calls or [])}, "
|
||||
f"tool_responses={len(tool_response_messages)}"
|
||||
)
|
||||
if (
|
||||
messages_to_save_early
|
||||
or has_appended_streaming_message
|
||||
):
|
||||
await upsert_chat_session(session)
|
||||
has_saved_assistant_message = True
|
||||
|
||||
has_yielded_end = True
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamError):
|
||||
has_yielded_error = True
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamUsage):
|
||||
session.usage.append(
|
||||
Usage(
|
||||
prompt_tokens=chunk.promptTokens,
|
||||
completion_tokens=chunk.completionTokens,
|
||||
total_tokens=chunk.totalTokens,
|
||||
)
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
f"Unknown chunk type: {type(chunk)}", exc_info=True
|
||||
)
|
||||
if assistant_response.content:
|
||||
langfuse.update_current_trace(output=assistant_response.content)
|
||||
langfuse.update_current_span(output=assistant_response.content)
|
||||
elif tool_response_messages:
|
||||
langfuse.update_current_trace(output=str(tool_response_messages))
|
||||
langfuse.update_current_span(output=str(tool_response_messages))
|
||||
|
||||
except CancelledError:
|
||||
if not has_saved_assistant_message:
|
||||
if accumulated_tool_calls:
|
||||
assistant_response.tool_calls = accumulated_tool_calls
|
||||
if assistant_response.content:
|
||||
assistant_response.content = (
|
||||
f"{assistant_response.content}\n\n[interrupted]"
|
||||
)
|
||||
else:
|
||||
assistant_response.content = "[interrupted]"
|
||||
if not has_appended_streaming_message:
|
||||
session.messages.append(assistant_response)
|
||||
if tool_response_messages:
|
||||
session.messages.extend(tool_response_messages)
|
||||
try:
|
||||
await cache_chat_session(session)
|
||||
await upsert_chat_session(session)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to cache partial session {session.session_id}: {e}"
|
||||
f"Failed to save interrupted session {session.session_id}: {e}"
|
||||
)
|
||||
last_cache_time = current_time
|
||||
last_cache_content_len = content_len
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamTextEnd):
|
||||
# Emit text-end after text completes
|
||||
if has_received_text and not text_streaming_ended:
|
||||
text_streaming_ended = True
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamToolInputStart):
|
||||
# Emit text-end before first tool call, but only if we've received text
|
||||
if has_received_text and not text_streaming_ended:
|
||||
yield StreamTextEnd(id=text_block_id)
|
||||
text_streaming_ended = True
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamToolInputAvailable):
|
||||
# Accumulate tool calls in OpenAI format
|
||||
accumulated_tool_calls.append(
|
||||
{
|
||||
"id": chunk.toolCallId,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": chunk.toolName,
|
||||
"arguments": orjson.dumps(chunk.input).decode("utf-8"),
|
||||
},
|
||||
}
|
||||
)
|
||||
elif isinstance(chunk, StreamToolOutputAvailable):
|
||||
result_content = (
|
||||
chunk.output
|
||||
if isinstance(chunk.output, str)
|
||||
else orjson.dumps(chunk.output).decode("utf-8")
|
||||
)
|
||||
tool_response_messages.append(
|
||||
ChatMessage(
|
||||
role="tool",
|
||||
content=result_content,
|
||||
tool_call_id=chunk.toolCallId,
|
||||
)
|
||||
)
|
||||
has_done_tool_call = True
|
||||
# Track if any tool execution failed
|
||||
if not chunk.success:
|
||||
logger.warning(
|
||||
f"Tool {chunk.toolName} (ID: {chunk.toolCallId}) execution failed"
|
||||
)
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamFinish):
|
||||
if not has_done_tool_call:
|
||||
# Emit text-end before finish if we received text but haven't closed it
|
||||
if has_received_text and not text_streaming_ended:
|
||||
yield StreamTextEnd(id=text_block_id)
|
||||
text_streaming_ended = True
|
||||
|
||||
# Save assistant message before yielding finish to ensure it's persisted
|
||||
# even if client disconnects immediately after receiving StreamFinish
|
||||
if not has_saved_assistant_message:
|
||||
messages_to_save_early: list[ChatMessage] = []
|
||||
if accumulated_tool_calls:
|
||||
assistant_response.tool_calls = accumulated_tool_calls
|
||||
if not has_appended_streaming_message and (
|
||||
assistant_response.content or assistant_response.tool_calls
|
||||
):
|
||||
messages_to_save_early.append(assistant_response)
|
||||
messages_to_save_early.extend(tool_response_messages)
|
||||
|
||||
if messages_to_save_early:
|
||||
session.messages.extend(messages_to_save_early)
|
||||
logger.info(
|
||||
f"Saving assistant message before StreamFinish: "
|
||||
f"content_len={len(assistant_response.content or '')}, "
|
||||
f"tool_calls={len(assistant_response.tool_calls or [])}, "
|
||||
f"tool_responses={len(tool_response_messages)}"
|
||||
)
|
||||
if messages_to_save_early or has_appended_streaming_message:
|
||||
await upsert_chat_session(session)
|
||||
has_saved_assistant_message = True
|
||||
|
||||
has_yielded_end = True
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamError):
|
||||
has_yielded_error = True
|
||||
yield chunk
|
||||
elif isinstance(chunk, StreamUsage):
|
||||
session.usage.append(
|
||||
Usage(
|
||||
prompt_tokens=chunk.promptTokens,
|
||||
completion_tokens=chunk.completionTokens,
|
||||
total_tokens=chunk.totalTokens,
|
||||
)
|
||||
)
|
||||
else:
|
||||
logger.error(f"Unknown chunk type: {type(chunk)}", exc_info=True)
|
||||
|
||||
except CancelledError:
|
||||
if not has_saved_assistant_message:
|
||||
if accumulated_tool_calls:
|
||||
assistant_response.tool_calls = accumulated_tool_calls
|
||||
if assistant_response.content:
|
||||
assistant_response.content = (
|
||||
f"{assistant_response.content}\n\n[interrupted]"
|
||||
)
|
||||
else:
|
||||
assistant_response.content = "[interrupted]"
|
||||
if not has_appended_streaming_message:
|
||||
session.messages.append(assistant_response)
|
||||
if tool_response_messages:
|
||||
session.messages.extend(tool_response_messages)
|
||||
try:
|
||||
await upsert_chat_session(session)
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to save interrupted session {session.session_id}: {e}"
|
||||
logger.error(f"Error during stream: {e!s}", exc_info=True)
|
||||
|
||||
# Check if this is a retryable error (JSON parsing, incomplete tool calls, etc.)
|
||||
is_retryable = isinstance(
|
||||
e, (orjson.JSONDecodeError, KeyError, TypeError)
|
||||
)
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error during stream: {e!s}", exc_info=True)
|
||||
|
||||
# Check if this is a retryable error (JSON parsing, incomplete tool calls, etc.)
|
||||
is_retryable = isinstance(e, (orjson.JSONDecodeError, KeyError, TypeError))
|
||||
if is_retryable and retry_count < config.max_retries:
|
||||
logger.info(
|
||||
f"Retryable error encountered. Attempt {retry_count + 1}/{config.max_retries}"
|
||||
)
|
||||
should_retry = True
|
||||
else:
|
||||
# Non-retryable error or max retries exceeded
|
||||
# Save any partial progress before reporting error
|
||||
messages_to_save: list[ChatMessage] = []
|
||||
|
||||
if is_retryable and retry_count < config.max_retries:
|
||||
logger.info(
|
||||
f"Retryable error encountered. Attempt {retry_count + 1}/{config.max_retries}"
|
||||
)
|
||||
should_retry = True
|
||||
else:
|
||||
# Non-retryable error or max retries exceeded
|
||||
# Save any partial progress before reporting error
|
||||
messages_to_save: list[ChatMessage] = []
|
||||
# Add assistant message if it has content or tool calls
|
||||
if accumulated_tool_calls:
|
||||
assistant_response.tool_calls = accumulated_tool_calls
|
||||
if not has_appended_streaming_message and (
|
||||
assistant_response.content or assistant_response.tool_calls
|
||||
):
|
||||
messages_to_save.append(assistant_response)
|
||||
|
||||
# Add assistant message if it has content or tool calls
|
||||
if accumulated_tool_calls:
|
||||
assistant_response.tool_calls = accumulated_tool_calls
|
||||
if not has_appended_streaming_message and (
|
||||
assistant_response.content or assistant_response.tool_calls
|
||||
):
|
||||
messages_to_save.append(assistant_response)
|
||||
# Add tool response messages after assistant message
|
||||
messages_to_save.extend(tool_response_messages)
|
||||
|
||||
# Add tool response messages after assistant message
|
||||
messages_to_save.extend(tool_response_messages)
|
||||
if not has_saved_assistant_message:
|
||||
if messages_to_save:
|
||||
session.messages.extend(messages_to_save)
|
||||
if messages_to_save or has_appended_streaming_message:
|
||||
await upsert_chat_session(session)
|
||||
|
||||
if not has_yielded_error:
|
||||
error_message = str(e)
|
||||
if not is_retryable:
|
||||
error_message = f"Non-retryable error: {error_message}"
|
||||
elif retry_count >= config.max_retries:
|
||||
error_message = f"Max retries ({config.max_retries}) exceeded: {error_message}"
|
||||
|
||||
error_response = StreamError(errorText=error_message)
|
||||
yield error_response
|
||||
if not has_yielded_end:
|
||||
yield StreamFinish()
|
||||
return
|
||||
|
||||
# Handle retry outside of exception handler to avoid nesting
|
||||
if should_retry and retry_count < config.max_retries:
|
||||
logger.info(
|
||||
f"Retrying stream_chat_completion for session {session_id}, attempt {retry_count + 1}"
|
||||
)
|
||||
async for chunk in stream_chat_completion(
|
||||
session_id=session.session_id,
|
||||
user_id=user_id,
|
||||
retry_count=retry_count + 1,
|
||||
session=session,
|
||||
context=context,
|
||||
):
|
||||
yield chunk
|
||||
return # Exit after retry to avoid double-saving in finally block
|
||||
|
||||
# Normal completion path - save session and handle tool call continuation
|
||||
# Only save if we haven't already saved when StreamFinish was received
|
||||
if not has_saved_assistant_message:
|
||||
logger.info(
|
||||
f"Normal completion path: session={session.session_id}, "
|
||||
f"current message_count={len(session.messages)}"
|
||||
)
|
||||
|
||||
# Build the messages list in the correct order
|
||||
messages_to_save: list[ChatMessage] = []
|
||||
|
||||
# Add assistant message with tool_calls if any
|
||||
if accumulated_tool_calls:
|
||||
assistant_response.tool_calls = accumulated_tool_calls
|
||||
logger.info(
|
||||
f"Added {len(accumulated_tool_calls)} tool calls to assistant message"
|
||||
)
|
||||
if not has_appended_streaming_message and (
|
||||
assistant_response.content or assistant_response.tool_calls
|
||||
):
|
||||
messages_to_save.append(assistant_response)
|
||||
logger.info(
|
||||
f"Saving assistant message with content_len={len(assistant_response.content or '')}, tool_calls={len(assistant_response.tool_calls or [])}"
|
||||
)
|
||||
|
||||
# Add tool response messages after assistant message
|
||||
messages_to_save.extend(tool_response_messages)
|
||||
logger.info(
|
||||
f"Saving {len(tool_response_messages)} tool response messages, "
|
||||
f"total_to_save={len(messages_to_save)}"
|
||||
)
|
||||
|
||||
if messages_to_save:
|
||||
session.messages.extend(messages_to_save)
|
||||
logger.info(
|
||||
f"Extended session messages, new message_count={len(session.messages)}"
|
||||
)
|
||||
if messages_to_save or has_appended_streaming_message:
|
||||
await upsert_chat_session(session)
|
||||
else:
|
||||
logger.info(
|
||||
"Assistant message already saved when StreamFinish was received, "
|
||||
"skipping duplicate save"
|
||||
)
|
||||
|
||||
if not has_yielded_error:
|
||||
error_message = str(e)
|
||||
if not is_retryable:
|
||||
error_message = f"Non-retryable error: {error_message}"
|
||||
elif retry_count >= config.max_retries:
|
||||
error_message = (
|
||||
f"Max retries ({config.max_retries}) exceeded: {error_message}"
|
||||
)
|
||||
|
||||
error_response = StreamError(errorText=error_message)
|
||||
yield error_response
|
||||
if not has_yielded_end:
|
||||
yield StreamFinish()
|
||||
return
|
||||
|
||||
# Handle retry outside of exception handler to avoid nesting
|
||||
if should_retry and retry_count < config.max_retries:
|
||||
logger.info(
|
||||
f"Retrying stream_chat_completion for session {session_id}, attempt {retry_count + 1}"
|
||||
)
|
||||
async for chunk in stream_chat_completion(
|
||||
session_id=session.session_id,
|
||||
user_id=user_id,
|
||||
retry_count=retry_count + 1,
|
||||
session=session,
|
||||
context=context,
|
||||
):
|
||||
yield chunk
|
||||
return # Exit after retry to avoid double-saving in finally block
|
||||
|
||||
# Normal completion path - save session and handle tool call continuation
|
||||
# Only save if we haven't already saved when StreamFinish was received
|
||||
if not has_saved_assistant_message:
|
||||
logger.info(
|
||||
f"Normal completion path: session={session.session_id}, "
|
||||
f"current message_count={len(session.messages)}"
|
||||
)
|
||||
|
||||
# Build the messages list in the correct order
|
||||
messages_to_save: list[ChatMessage] = []
|
||||
|
||||
# Add assistant message with tool_calls if any
|
||||
if accumulated_tool_calls:
|
||||
assistant_response.tool_calls = accumulated_tool_calls
|
||||
logger.info(
|
||||
f"Added {len(accumulated_tool_calls)} tool calls to assistant message"
|
||||
)
|
||||
if not has_appended_streaming_message and (
|
||||
assistant_response.content or assistant_response.tool_calls
|
||||
):
|
||||
messages_to_save.append(assistant_response)
|
||||
logger.info(
|
||||
f"Saving assistant message with content_len={len(assistant_response.content or '')}, tool_calls={len(assistant_response.tool_calls or [])}"
|
||||
)
|
||||
|
||||
# Add tool response messages after assistant message
|
||||
messages_to_save.extend(tool_response_messages)
|
||||
logger.info(
|
||||
f"Saving {len(tool_response_messages)} tool response messages, "
|
||||
f"total_to_save={len(messages_to_save)}"
|
||||
)
|
||||
|
||||
if messages_to_save:
|
||||
session.messages.extend(messages_to_save)
|
||||
logger.info(
|
||||
f"Extended session messages, new message_count={len(session.messages)}"
|
||||
)
|
||||
if messages_to_save or has_appended_streaming_message:
|
||||
await upsert_chat_session(session)
|
||||
else:
|
||||
logger.info(
|
||||
"Assistant message already saved when StreamFinish was received, "
|
||||
"skipping duplicate save"
|
||||
)
|
||||
|
||||
# If we did a tool call, stream the chat completion again to get the next response
|
||||
if has_done_tool_call:
|
||||
logger.info(
|
||||
"Tool call executed, streaming chat completion again to get assistant response"
|
||||
)
|
||||
async for chunk in stream_chat_completion(
|
||||
session_id=session.session_id,
|
||||
user_id=user_id,
|
||||
session=session, # Pass session object to avoid Redis refetch
|
||||
context=context,
|
||||
tool_call_response=str(tool_response_messages),
|
||||
):
|
||||
yield chunk
|
||||
# If we did a tool call, stream the chat completion again to get the next response
|
||||
if has_done_tool_call:
|
||||
logger.info(
|
||||
"Tool call executed, streaming chat completion again to get assistant response"
|
||||
)
|
||||
async for chunk in stream_chat_completion(
|
||||
session_id=session.session_id,
|
||||
user_id=user_id,
|
||||
session=session, # Pass session object to avoid Redis refetch
|
||||
context=context,
|
||||
tool_call_response=str(tool_response_messages),
|
||||
):
|
||||
yield chunk
|
||||
|
||||
|
||||
# Retry configuration for OpenAI API calls
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from langfuse import observe
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
from backend.data.understanding import (
|
||||
BusinessUnderstandingInput,
|
||||
@@ -59,6 +61,7 @@ and automations for the user's specific needs."""
|
||||
"""Requires authentication to store user-specific data."""
|
||||
return True
|
||||
|
||||
@observe(as_type="tool", name="add_understanding")
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
|
||||
@@ -5,6 +5,7 @@ import re
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
from langfuse import observe
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
@@ -328,6 +329,7 @@ class AgentOutputTool(BaseTool):
|
||||
total_executions=len(available_executions) if available_executions else 1,
|
||||
)
|
||||
|
||||
@observe(as_type="tool", name="view_agent_output")
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from langfuse import observe
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
|
||||
from .agent_generator import (
|
||||
@@ -78,6 +80,7 @@ class CreateAgentTool(BaseTool):
|
||||
"required": ["description"],
|
||||
}
|
||||
|
||||
@observe(as_type="tool", name="create_agent")
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from langfuse import observe
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
|
||||
from .agent_generator import (
|
||||
@@ -85,6 +87,7 @@ class EditAgentTool(BaseTool):
|
||||
"required": ["agent_id", "changes"],
|
||||
}
|
||||
|
||||
@observe(as_type="tool", name="edit_agent")
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
from typing import Any
|
||||
|
||||
from langfuse import observe
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
|
||||
from .agent_search import search_agents
|
||||
@@ -35,6 +37,7 @@ class FindAgentTool(BaseTool):
|
||||
"required": ["query"],
|
||||
}
|
||||
|
||||
@observe(as_type="tool", name="find_agent")
|
||||
async def _execute(
|
||||
self, user_id: str | None, session: ChatSession, **kwargs
|
||||
) -> ToolResponseBase:
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from langfuse import observe
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
@@ -55,6 +56,7 @@ class FindBlockTool(BaseTool):
|
||||
def requires_auth(self) -> bool:
|
||||
return True
|
||||
|
||||
@observe(as_type="tool", name="find_block")
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
from typing import Any
|
||||
|
||||
from langfuse import observe
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
|
||||
from .agent_search import search_agents
|
||||
@@ -41,6 +43,7 @@ class FindLibraryAgentTool(BaseTool):
|
||||
def requires_auth(self) -> bool:
|
||||
return True
|
||||
|
||||
@observe(as_type="tool", name="find_library_agent")
|
||||
async def _execute(
|
||||
self, user_id: str | None, session: ChatSession, **kwargs
|
||||
) -> ToolResponseBase:
|
||||
|
||||
@@ -4,6 +4,8 @@ import logging
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from langfuse import observe
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
from backend.api.features.chat.tools.base import BaseTool
|
||||
from backend.api.features.chat.tools.models import (
|
||||
@@ -71,6 +73,7 @@ class GetDocPageTool(BaseTool):
|
||||
url_path = path.rsplit(".", 1)[0] if "." in path else path
|
||||
return f"{DOCS_BASE_URL}/{url_path}"
|
||||
|
||||
@observe(as_type="tool", name="get_doc_page")
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from langfuse import observe
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from backend.api.features.chat.config import ChatConfig
|
||||
@@ -154,6 +155,7 @@ class RunAgentTool(BaseTool):
|
||||
"""All operations require authentication."""
|
||||
return True
|
||||
|
||||
@observe(as_type="tool", name="run_agent")
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
|
||||
@@ -4,6 +4,8 @@ import logging
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
|
||||
from langfuse import observe
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
from backend.data.block import get_block
|
||||
from backend.data.execution import ExecutionContext
|
||||
@@ -128,6 +130,7 @@ class RunBlockTool(BaseTool):
|
||||
|
||||
return matched_credentials, missing_credentials
|
||||
|
||||
@observe(as_type="tool", name="run_block")
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from langfuse import observe
|
||||
from prisma.enums import ContentType
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
@@ -87,6 +88,7 @@ class SearchDocsTool(BaseTool):
|
||||
url_path = path.rsplit(".", 1)[0] if "." in path else path
|
||||
return f"{DOCS_BASE_URL}/{url_path}"
|
||||
|
||||
@observe(as_type="tool", name="search_docs")
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
|
||||
28
autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py
Normal file
28
autogpt_platform/backend/backend/blocks/elevenlabs/_auth.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""ElevenLabs integration blocks - test credentials and shared utilities."""
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import SecretStr
|
||||
|
||||
from backend.data.model import APIKeyCredentials, CredentialsMetaInput
|
||||
from backend.integrations.providers import ProviderName
|
||||
|
||||
TEST_CREDENTIALS = APIKeyCredentials(
|
||||
id="01234567-89ab-cdef-0123-456789abcdef",
|
||||
provider="elevenlabs",
|
||||
api_key=SecretStr("mock-elevenlabs-api-key"),
|
||||
title="Mock ElevenLabs API key",
|
||||
expires_at=None,
|
||||
)
|
||||
|
||||
TEST_CREDENTIALS_INPUT = {
|
||||
"provider": TEST_CREDENTIALS.provider,
|
||||
"id": TEST_CREDENTIALS.id,
|
||||
"type": TEST_CREDENTIALS.type,
|
||||
"title": TEST_CREDENTIALS.title,
|
||||
}
|
||||
|
||||
ElevenLabsCredentials = APIKeyCredentials
|
||||
ElevenLabsCredentialsInput = CredentialsMetaInput[
|
||||
Literal[ProviderName.ELEVENLABS], Literal["api_key"]
|
||||
]
|
||||
@@ -1,251 +0,0 @@
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Literal, Optional
|
||||
|
||||
from moviepy.audio.io.AudioFileClip import AudioFileClip
|
||||
from moviepy.video.fx.Loop import Loop
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import SchemaField
|
||||
from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
|
||||
|
||||
|
||||
class MediaDurationBlock(Block):
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
media_in: MediaFileType = SchemaField(
|
||||
description="Media input (URL, data URI, or local path)."
|
||||
)
|
||||
is_video: bool = SchemaField(
|
||||
description="Whether the media is a video (True) or audio (False).",
|
||||
default=True,
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
duration: float = SchemaField(
|
||||
description="Duration of the media file (in seconds)."
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6",
|
||||
description="Block to get the duration of a media file.",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=MediaDurationBlock.Input,
|
||||
output_schema=MediaDurationBlock.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
# 1) Store the input media locally
|
||||
local_media_path = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=input_data.media_in,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
media_abspath = get_exec_file_path(graph_exec_id, local_media_path)
|
||||
|
||||
# 2) Load the clip
|
||||
if input_data.is_video:
|
||||
clip = VideoFileClip(media_abspath)
|
||||
else:
|
||||
clip = AudioFileClip(media_abspath)
|
||||
|
||||
yield "duration", clip.duration
|
||||
|
||||
|
||||
class LoopVideoBlock(Block):
|
||||
"""
|
||||
Block for looping (repeating) a video clip until a given duration or number of loops.
|
||||
"""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
video_in: MediaFileType = SchemaField(
|
||||
description="The input video (can be a URL, data URI, or local path)."
|
||||
)
|
||||
# Provide EITHER a `duration` or `n_loops` or both. We'll demonstrate `duration`.
|
||||
duration: Optional[float] = SchemaField(
|
||||
description="Target duration (in seconds) to loop the video to. If omitted, defaults to no looping.",
|
||||
default=None,
|
||||
ge=0.0,
|
||||
)
|
||||
n_loops: Optional[int] = SchemaField(
|
||||
description="Number of times to repeat the video. If omitted, defaults to 1 (no repeat).",
|
||||
default=None,
|
||||
ge=1,
|
||||
)
|
||||
output_return_type: Literal["file_path", "data_uri"] = SchemaField(
|
||||
description="How to return the output video. Either a relative path or base64 data URI.",
|
||||
default="file_path",
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
video_out: str = SchemaField(
|
||||
description="Looped video returned either as a relative path or a data URI."
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="8bf9eef6-5451-4213-b265-25306446e94b",
|
||||
description="Block to loop a video to a given duration or number of repeats.",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=LoopVideoBlock.Input,
|
||||
output_schema=LoopVideoBlock.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
# 1) Store the input video locally
|
||||
local_video_path = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=input_data.video_in,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
input_abspath = get_exec_file_path(graph_exec_id, local_video_path)
|
||||
|
||||
# 2) Load the clip
|
||||
clip = VideoFileClip(input_abspath)
|
||||
|
||||
# 3) Apply the loop effect
|
||||
looped_clip = clip
|
||||
if input_data.duration:
|
||||
# Loop until we reach the specified duration
|
||||
looped_clip = looped_clip.with_effects([Loop(duration=input_data.duration)])
|
||||
elif input_data.n_loops:
|
||||
looped_clip = looped_clip.with_effects([Loop(n=input_data.n_loops)])
|
||||
else:
|
||||
raise ValueError("Either 'duration' or 'n_loops' must be provided.")
|
||||
|
||||
assert isinstance(looped_clip, VideoFileClip)
|
||||
|
||||
# 4) Save the looped output
|
||||
output_filename = MediaFileType(
|
||||
f"{node_exec_id}_looped_{os.path.basename(local_video_path)}"
|
||||
)
|
||||
output_abspath = get_exec_file_path(graph_exec_id, output_filename)
|
||||
|
||||
looped_clip = looped_clip.with_audio(clip.audio)
|
||||
looped_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
|
||||
|
||||
# Return as data URI
|
||||
video_out = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=output_filename,
|
||||
user_id=user_id,
|
||||
return_content=input_data.output_return_type == "data_uri",
|
||||
)
|
||||
|
||||
yield "video_out", video_out
|
||||
|
||||
|
||||
class AddAudioToVideoBlock(Block):
|
||||
"""
|
||||
Block that adds (attaches) an audio track to an existing video.
|
||||
Optionally scale the volume of the new track.
|
||||
"""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
video_in: MediaFileType = SchemaField(
|
||||
description="Video input (URL, data URI, or local path)."
|
||||
)
|
||||
audio_in: MediaFileType = SchemaField(
|
||||
description="Audio input (URL, data URI, or local path)."
|
||||
)
|
||||
volume: float = SchemaField(
|
||||
description="Volume scale for the newly attached audio track (1.0 = original).",
|
||||
default=1.0,
|
||||
)
|
||||
output_return_type: Literal["file_path", "data_uri"] = SchemaField(
|
||||
description="Return the final output as a relative path or base64 data URI.",
|
||||
default="file_path",
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
video_out: MediaFileType = SchemaField(
|
||||
description="Final video (with attached audio), as a path or data URI."
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="3503748d-62b6-4425-91d6-725b064af509",
|
||||
description="Block to attach an audio file to a video file using moviepy.",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=AddAudioToVideoBlock.Input,
|
||||
output_schema=AddAudioToVideoBlock.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
# 1) Store the inputs locally
|
||||
local_video_path = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=input_data.video_in,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
local_audio_path = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=input_data.audio_in,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
|
||||
abs_temp_dir = os.path.join(tempfile.gettempdir(), "exec_file", graph_exec_id)
|
||||
video_abspath = os.path.join(abs_temp_dir, local_video_path)
|
||||
audio_abspath = os.path.join(abs_temp_dir, local_audio_path)
|
||||
|
||||
# 2) Load video + audio with moviepy
|
||||
video_clip = VideoFileClip(video_abspath)
|
||||
audio_clip = AudioFileClip(audio_abspath)
|
||||
# Optionally scale volume
|
||||
if input_data.volume != 1.0:
|
||||
audio_clip = audio_clip.with_volume_scaled(input_data.volume)
|
||||
|
||||
# 3) Attach the new audio track
|
||||
final_clip = video_clip.with_audio(audio_clip)
|
||||
|
||||
# 4) Write to output file
|
||||
output_filename = MediaFileType(
|
||||
f"{node_exec_id}_audio_attached_{os.path.basename(local_video_path)}"
|
||||
)
|
||||
output_abspath = os.path.join(abs_temp_dir, output_filename)
|
||||
final_clip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
|
||||
|
||||
# 5) Return either path or data URI
|
||||
video_out = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=output_filename,
|
||||
user_id=user_id,
|
||||
return_content=input_data.output_return_type == "data_uri",
|
||||
)
|
||||
|
||||
yield "video_out", video_out
|
||||
37
autogpt_platform/backend/backend/blocks/video/__init__.py
Normal file
37
autogpt_platform/backend/backend/blocks/video/__init__.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""Video editing blocks for AutoGPT Platform.
|
||||
|
||||
This module provides blocks for:
|
||||
- Downloading videos from URLs (YouTube, Vimeo, news sites, direct links)
|
||||
- Clipping/trimming video segments
|
||||
- Concatenating multiple videos
|
||||
- Adding text overlays
|
||||
- Adding AI-generated narration
|
||||
- Getting media duration
|
||||
- Looping videos
|
||||
- Adding audio to videos
|
||||
|
||||
Dependencies:
|
||||
- yt-dlp: For video downloading
|
||||
- moviepy: For video editing operations
|
||||
- requests: For API calls (narration block)
|
||||
"""
|
||||
|
||||
from backend.blocks.video.add_audio import AddAudioToVideoBlock
|
||||
from backend.blocks.video.clip import VideoClipBlock
|
||||
from backend.blocks.video.concat import VideoConcatBlock
|
||||
from backend.blocks.video.download import VideoDownloadBlock
|
||||
from backend.blocks.video.duration import MediaDurationBlock
|
||||
from backend.blocks.video.loop import LoopVideoBlock
|
||||
from backend.blocks.video.narration import VideoNarrationBlock
|
||||
from backend.blocks.video.text_overlay import VideoTextOverlayBlock
|
||||
|
||||
__all__ = [
|
||||
"AddAudioToVideoBlock",
|
||||
"LoopVideoBlock",
|
||||
"MediaDurationBlock",
|
||||
"VideoClipBlock",
|
||||
"VideoConcatBlock",
|
||||
"VideoDownloadBlock",
|
||||
"VideoNarrationBlock",
|
||||
"VideoTextOverlayBlock",
|
||||
]
|
||||
125
autogpt_platform/backend/backend/blocks/video/add_audio.py
Normal file
125
autogpt_platform/backend/backend/blocks/video/add_audio.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""AddAudioToVideoBlock - Attach an audio track to a video."""
|
||||
|
||||
import os
|
||||
from typing import Literal
|
||||
|
||||
from moviepy.audio.io.AudioFileClip import AudioFileClip
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import SchemaField
|
||||
from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
|
||||
|
||||
|
||||
class AddAudioToVideoBlock(Block):
|
||||
"""Attach an audio track to an existing video."""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
video_in: MediaFileType = SchemaField(
|
||||
description="Video input (URL, data URI, or local path)."
|
||||
)
|
||||
audio_in: MediaFileType = SchemaField(
|
||||
description="Audio input (URL, data URI, or local path)."
|
||||
)
|
||||
volume: float = SchemaField(
|
||||
description="Volume scale for the newly attached audio track (1.0 = original).",
|
||||
default=1.0,
|
||||
)
|
||||
output_return_type: Literal["file_path", "data_uri"] = SchemaField(
|
||||
description="Return the final output as a relative path or base64 data URI.",
|
||||
default="file_path",
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
video_out: MediaFileType = SchemaField(
|
||||
description="Final video (with attached audio), as a path or data URI."
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="3503748d-62b6-4425-91d6-725b064af509",
|
||||
description="Block to attach an audio file to a video file using moviepy.",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=AddAudioToVideoBlock.Input,
|
||||
output_schema=AddAudioToVideoBlock.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
# 1) Store the inputs locally
|
||||
local_video_path = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=input_data.video_in,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
local_audio_path = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=input_data.audio_in,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
|
||||
video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
|
||||
audio_abspath = get_exec_file_path(graph_exec_id, local_audio_path)
|
||||
|
||||
video_clip = None
|
||||
audio_clip_original = None
|
||||
audio_clip_scaled = None
|
||||
final_clip = None
|
||||
try:
|
||||
# 2) Load video + audio with moviepy
|
||||
video_clip = VideoFileClip(video_abspath)
|
||||
audio_clip_original = AudioFileClip(audio_abspath)
|
||||
|
||||
# Optionally scale volume
|
||||
audio_to_use = audio_clip_original
|
||||
if input_data.volume != 1.0:
|
||||
audio_clip_scaled = audio_clip_original.with_volume_scaled(
|
||||
input_data.volume
|
||||
)
|
||||
audio_to_use = audio_clip_scaled
|
||||
|
||||
# 3) Attach the new audio track
|
||||
final_clip = video_clip.with_audio(audio_to_use)
|
||||
|
||||
# 4) Write to output file
|
||||
output_filename = MediaFileType(
|
||||
f"{node_exec_id}_audio_attached_{os.path.basename(local_video_path)}"
|
||||
)
|
||||
output_abspath = get_exec_file_path(graph_exec_id, output_filename)
|
||||
final_clip.write_videofile(
|
||||
output_abspath, codec="libx264", audio_codec="aac"
|
||||
)
|
||||
|
||||
# 5) Return either path or data URI
|
||||
video_out = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=output_filename,
|
||||
user_id=user_id,
|
||||
return_content=input_data.output_return_type == "data_uri",
|
||||
)
|
||||
|
||||
yield "video_out", video_out
|
||||
finally:
|
||||
if final_clip:
|
||||
final_clip.close()
|
||||
if audio_clip_scaled:
|
||||
audio_clip_scaled.close()
|
||||
if audio_clip_original:
|
||||
audio_clip_original.close()
|
||||
if video_clip:
|
||||
video_clip.close()
|
||||
168
autogpt_platform/backend/backend/blocks/video/clip.py
Normal file
168
autogpt_platform/backend/backend/blocks/video/clip.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""VideoClipBlock - Extract a segment from a video file."""
|
||||
|
||||
import os
|
||||
from typing import Literal
|
||||
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import SchemaField
|
||||
from backend.util.exceptions import BlockExecutionError
|
||||
from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
|
||||
|
||||
|
||||
class VideoClipBlock(Block):
|
||||
"""Extract a time segment from a video."""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
video_in: MediaFileType = SchemaField(
|
||||
description="Input video (URL, data URI, or local path)"
|
||||
)
|
||||
start_time: float = SchemaField(description="Start time in seconds", ge=0.0)
|
||||
end_time: float = SchemaField(description="End time in seconds", ge=0.0)
|
||||
output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField(
|
||||
description="Output format", default="mp4", advanced=True
|
||||
)
|
||||
output_return_type: Literal["file_path", "data_uri"] = SchemaField(
|
||||
description="Return the output as a relative path or base64 data URI.",
|
||||
default="file_path",
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
video_out: MediaFileType = SchemaField(
|
||||
description="Clipped video file (path or data URI)"
|
||||
)
|
||||
duration: float = SchemaField(description="Clip duration in seconds")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="8f539119-e580-4d86-ad41-86fbcb22abb1",
|
||||
description="Extract a time segment from a video",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
test_input={
|
||||
"video_in": "/tmp/test.mp4",
|
||||
"start_time": 0.0,
|
||||
"end_time": 10.0,
|
||||
},
|
||||
test_output=[("video_out", str), ("duration", float)],
|
||||
test_mock={
|
||||
"_clip_video": lambda *args: 10.0,
|
||||
"_store_input_video": lambda *args, **kwargs: "test.mp4",
|
||||
"_store_output_video": lambda *args, **kwargs: "clip_test.mp4",
|
||||
},
|
||||
)
|
||||
|
||||
async def _store_input_video(
|
||||
self, graph_exec_id: str, file: MediaFileType, user_id: str
|
||||
) -> MediaFileType:
|
||||
"""Store input video. Extracted for testability."""
|
||||
return await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=file,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
|
||||
async def _store_output_video(
|
||||
self,
|
||||
graph_exec_id: str,
|
||||
file: MediaFileType,
|
||||
user_id: str,
|
||||
return_content: bool,
|
||||
) -> MediaFileType:
|
||||
"""Store output video. Extracted for testability."""
|
||||
return await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=file,
|
||||
user_id=user_id,
|
||||
return_content=return_content,
|
||||
)
|
||||
|
||||
def _clip_video(
|
||||
self,
|
||||
video_abspath: str,
|
||||
output_abspath: str,
|
||||
start_time: float,
|
||||
end_time: float,
|
||||
) -> float:
|
||||
"""Extract a clip from a video. Extracted for testability."""
|
||||
clip = None
|
||||
subclip = None
|
||||
try:
|
||||
clip = VideoFileClip(video_abspath)
|
||||
subclip = clip.subclipped(start_time, end_time)
|
||||
subclip.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
|
||||
return subclip.duration
|
||||
finally:
|
||||
if subclip:
|
||||
subclip.close()
|
||||
if clip:
|
||||
clip.close()
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
# Validate time range
|
||||
if input_data.end_time <= input_data.start_time:
|
||||
raise BlockExecutionError(
|
||||
message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})",
|
||||
block_name=self.name,
|
||||
block_id=str(self.id),
|
||||
)
|
||||
|
||||
try:
|
||||
# Store the input video locally
|
||||
local_video_path = await self._store_input_video(
|
||||
graph_exec_id, input_data.video_in, user_id
|
||||
)
|
||||
video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
|
||||
|
||||
# Build output path
|
||||
output_filename = MediaFileType(
|
||||
f"{node_exec_id}_clip_{os.path.basename(local_video_path)}"
|
||||
)
|
||||
# Ensure correct extension
|
||||
base, _ = os.path.splitext(output_filename)
|
||||
output_filename = MediaFileType(f"{base}.{input_data.output_format}")
|
||||
output_abspath = get_exec_file_path(graph_exec_id, output_filename)
|
||||
|
||||
duration = self._clip_video(
|
||||
video_abspath,
|
||||
output_abspath,
|
||||
input_data.start_time,
|
||||
input_data.end_time,
|
||||
)
|
||||
|
||||
# Return as data URI or path
|
||||
video_out = await self._store_output_video(
|
||||
graph_exec_id,
|
||||
output_filename,
|
||||
user_id,
|
||||
input_data.output_return_type == "data_uri",
|
||||
)
|
||||
|
||||
yield "video_out", video_out
|
||||
yield "duration", duration
|
||||
|
||||
except BlockExecutionError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise BlockExecutionError(
|
||||
message=f"Failed to clip video: {e}",
|
||||
block_name=self.name,
|
||||
block_id=str(self.id),
|
||||
) from e
|
||||
202
autogpt_platform/backend/backend/blocks/video/concat.py
Normal file
202
autogpt_platform/backend/backend/blocks/video/concat.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""VideoConcatBlock - Concatenate multiple video clips into one."""
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from moviepy import concatenate_videoclips
|
||||
from moviepy.video.fx import CrossFadeIn, CrossFadeOut, FadeIn, FadeOut
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import SchemaField
|
||||
from backend.util.exceptions import BlockExecutionError
|
||||
from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
|
||||
|
||||
|
||||
class VideoConcatBlock(Block):
|
||||
"""Merge multiple video clips into one continuous video."""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
videos: list[MediaFileType] = SchemaField(
|
||||
description="List of video files to concatenate (in order)"
|
||||
)
|
||||
transition: Literal["none", "crossfade", "fade_black"] = SchemaField(
|
||||
description="Transition between clips", default="none"
|
||||
)
|
||||
transition_duration: int = SchemaField(
|
||||
description="Transition duration in seconds",
|
||||
default=1,
|
||||
ge=0,
|
||||
advanced=True,
|
||||
)
|
||||
output_format: Literal["mp4", "webm", "mkv", "mov"] = SchemaField(
|
||||
description="Output format", default="mp4", advanced=True
|
||||
)
|
||||
output_return_type: Literal["file_path", "data_uri"] = SchemaField(
|
||||
description="Return the output as a relative path or base64 data URI.",
|
||||
default="file_path",
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
video_out: MediaFileType = SchemaField(
|
||||
description="Concatenated video file (path or data URI)"
|
||||
)
|
||||
total_duration: float = SchemaField(description="Total duration in seconds")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="9b0f531a-1118-487f-aeec-3fa63ea8900a",
|
||||
description="Merge multiple video clips into one continuous video",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
test_input={"videos": ["/tmp/a.mp4", "/tmp/b.mp4"]},
|
||||
test_output=[("video_out", str), ("total_duration", float)],
|
||||
test_mock={
|
||||
"_concat_videos": lambda *args: 20.0,
|
||||
"_store_input_video": lambda *args, **kwargs: "test.mp4",
|
||||
"_store_output_video": lambda *args, **kwargs: "concat_test.mp4",
|
||||
},
|
||||
)
|
||||
|
||||
async def _store_input_video(
|
||||
self, graph_exec_id: str, file: MediaFileType, user_id: str
|
||||
) -> MediaFileType:
|
||||
"""Store input video. Extracted for testability."""
|
||||
return await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=file,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
|
||||
async def _store_output_video(
|
||||
self,
|
||||
graph_exec_id: str,
|
||||
file: MediaFileType,
|
||||
user_id: str,
|
||||
return_content: bool,
|
||||
) -> MediaFileType:
|
||||
"""Store output video. Extracted for testability."""
|
||||
return await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=file,
|
||||
user_id=user_id,
|
||||
return_content=return_content,
|
||||
)
|
||||
|
||||
def _concat_videos(
|
||||
self,
|
||||
video_abspaths: list[str],
|
||||
output_abspath: str,
|
||||
transition: str,
|
||||
transition_duration: int,
|
||||
) -> float:
|
||||
"""Concatenate videos. Extracted for testability."""
|
||||
clips = []
|
||||
faded_clips = []
|
||||
final = None
|
||||
try:
|
||||
# Load clips
|
||||
for v in video_abspaths:
|
||||
clips.append(VideoFileClip(v))
|
||||
|
||||
if transition == "crossfade":
|
||||
for i, clip in enumerate(clips):
|
||||
effects = []
|
||||
if i > 0:
|
||||
effects.append(CrossFadeIn(transition_duration))
|
||||
if i < len(clips) - 1:
|
||||
effects.append(CrossFadeOut(transition_duration))
|
||||
if effects:
|
||||
clip = clip.with_effects(effects)
|
||||
faded_clips.append(clip)
|
||||
final = concatenate_videoclips(
|
||||
faded_clips,
|
||||
method="compose",
|
||||
padding=-transition_duration,
|
||||
)
|
||||
elif transition == "fade_black":
|
||||
for clip in clips:
|
||||
faded = clip.with_effects(
|
||||
[FadeIn(transition_duration), FadeOut(transition_duration)]
|
||||
)
|
||||
faded_clips.append(faded)
|
||||
final = concatenate_videoclips(faded_clips)
|
||||
else:
|
||||
final = concatenate_videoclips(clips)
|
||||
|
||||
final.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
|
||||
|
||||
return final.duration
|
||||
finally:
|
||||
if final:
|
||||
final.close()
|
||||
for clip in faded_clips:
|
||||
clip.close()
|
||||
for clip in clips:
|
||||
clip.close()
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
# Validate minimum clips
|
||||
if len(input_data.videos) < 2:
|
||||
raise BlockExecutionError(
|
||||
message="At least 2 videos are required for concatenation",
|
||||
block_name=self.name,
|
||||
block_id=str(self.id),
|
||||
)
|
||||
|
||||
try:
|
||||
# Store all input videos locally
|
||||
video_abspaths = []
|
||||
for video in input_data.videos:
|
||||
local_path = await self._store_input_video(
|
||||
graph_exec_id, video, user_id
|
||||
)
|
||||
video_abspaths.append(get_exec_file_path(graph_exec_id, local_path))
|
||||
|
||||
# Build output path
|
||||
output_filename = MediaFileType(
|
||||
f"{node_exec_id}_concat.{input_data.output_format}"
|
||||
)
|
||||
output_abspath = get_exec_file_path(graph_exec_id, output_filename)
|
||||
|
||||
total_duration = self._concat_videos(
|
||||
video_abspaths,
|
||||
output_abspath,
|
||||
input_data.transition,
|
||||
input_data.transition_duration,
|
||||
)
|
||||
|
||||
# Return as data URI or path
|
||||
video_out = await self._store_output_video(
|
||||
graph_exec_id,
|
||||
output_filename,
|
||||
user_id,
|
||||
input_data.output_return_type == "data_uri",
|
||||
)
|
||||
|
||||
yield "video_out", video_out
|
||||
yield "total_duration", total_duration
|
||||
|
||||
except BlockExecutionError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise BlockExecutionError(
|
||||
message=f"Failed to concatenate videos: {e}",
|
||||
block_name=self.name,
|
||||
block_id=str(self.id),
|
||||
) from e
|
||||
177
autogpt_platform/backend/backend/blocks/video/download.py
Normal file
177
autogpt_platform/backend/backend/blocks/video/download.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""VideoDownloadBlock - Download video from URL (YouTube, Vimeo, news sites, direct links)."""
|
||||
|
||||
import os
|
||||
import typing
|
||||
from typing import Literal
|
||||
|
||||
import yt_dlp
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from yt_dlp import _Params
|
||||
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import SchemaField
|
||||
from backend.util.exceptions import BlockExecutionError
|
||||
from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
|
||||
|
||||
|
||||
class VideoDownloadBlock(Block):
|
||||
"""Download video from URL using yt-dlp."""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
url: str = SchemaField(
|
||||
description="URL of the video to download (YouTube, Vimeo, direct link, etc.)",
|
||||
placeholder="https://www.youtube.com/watch?v=...",
|
||||
)
|
||||
quality: Literal["best", "1080p", "720p", "480p", "audio_only"] = SchemaField(
|
||||
description="Video quality preference", default="720p"
|
||||
)
|
||||
output_format: Literal["mp4", "webm", "mkv"] = SchemaField(
|
||||
description="Output video format", default="mp4", advanced=True
|
||||
)
|
||||
output_return_type: Literal["file_path", "data_uri"] = SchemaField(
|
||||
description="Return the output as a relative path or base64 data URI.",
|
||||
default="file_path",
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
video_file: MediaFileType = SchemaField(
|
||||
description="Downloaded video (path or data URI)"
|
||||
)
|
||||
duration: float = SchemaField(description="Video duration in seconds")
|
||||
title: str = SchemaField(description="Video title from source")
|
||||
source_url: str = SchemaField(description="Original source URL")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="c35daabb-cd60-493b-b9ad-51f1fe4b50c4",
|
||||
description="Download video from URL (YouTube, Vimeo, news sites, direct links)",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
test_input={
|
||||
"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
||||
"quality": "480p",
|
||||
},
|
||||
test_output=[
|
||||
("video_file", str),
|
||||
("duration", float),
|
||||
("title", str),
|
||||
("source_url", str),
|
||||
],
|
||||
test_mock={
|
||||
"_download_video": lambda *args: ("video.mp4", 212.0, "Test Video"),
|
||||
"_store_output_video": lambda *args, **kwargs: "video.mp4",
|
||||
},
|
||||
)
|
||||
|
||||
async def _store_output_video(
|
||||
self,
|
||||
graph_exec_id: str,
|
||||
file: MediaFileType,
|
||||
user_id: str,
|
||||
return_content: bool,
|
||||
) -> MediaFileType:
|
||||
"""Store output video. Extracted for testability."""
|
||||
return await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=file,
|
||||
user_id=user_id,
|
||||
return_content=return_content,
|
||||
)
|
||||
|
||||
def _get_format_string(self, quality: str) -> str:
|
||||
formats = {
|
||||
"best": "bestvideo+bestaudio/best",
|
||||
"1080p": "bestvideo[height<=1080]+bestaudio/best[height<=1080]",
|
||||
"720p": "bestvideo[height<=720]+bestaudio/best[height<=720]",
|
||||
"480p": "bestvideo[height<=480]+bestaudio/best[height<=480]",
|
||||
"audio_only": "bestaudio/best",
|
||||
}
|
||||
return formats.get(quality, formats["720p"])
|
||||
|
||||
def _download_video(
|
||||
self,
|
||||
url: str,
|
||||
quality: str,
|
||||
output_format: str,
|
||||
output_dir: str,
|
||||
node_exec_id: str,
|
||||
) -> tuple[str, float, str]:
|
||||
"""Download video. Extracted for testability."""
|
||||
output_template = os.path.join(
|
||||
output_dir, f"{node_exec_id}_%(title).50s.%(ext)s"
|
||||
)
|
||||
|
||||
ydl_opts: "_Params" = {
|
||||
"format": self._get_format_string(quality),
|
||||
"outtmpl": output_template,
|
||||
"merge_output_format": output_format,
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
}
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(url, download=True)
|
||||
video_path = ydl.prepare_filename(info)
|
||||
|
||||
# Handle format conversion in filename
|
||||
if not video_path.endswith(f".{output_format}"):
|
||||
video_path = video_path.rsplit(".", 1)[0] + f".{output_format}"
|
||||
|
||||
# Return just the filename, not the full path
|
||||
filename = os.path.basename(video_path)
|
||||
|
||||
return (
|
||||
filename,
|
||||
info.get("duration") or 0.0,
|
||||
info.get("title") or "Unknown",
|
||||
)
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
try:
|
||||
# Get the exec file directory
|
||||
output_dir = get_exec_file_path(graph_exec_id, "")
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
filename, duration, title = self._download_video(
|
||||
input_data.url,
|
||||
input_data.quality,
|
||||
input_data.output_format,
|
||||
output_dir,
|
||||
node_exec_id,
|
||||
)
|
||||
|
||||
# Return as data URI or path
|
||||
video_out = await self._store_output_video(
|
||||
graph_exec_id,
|
||||
MediaFileType(filename),
|
||||
user_id,
|
||||
input_data.output_return_type == "data_uri",
|
||||
)
|
||||
|
||||
yield "video_file", video_out
|
||||
yield "duration", duration
|
||||
yield "title", title
|
||||
yield "source_url", input_data.url
|
||||
|
||||
except Exception as e:
|
||||
raise BlockExecutionError(
|
||||
message=f"Failed to download video: {e}",
|
||||
block_name=self.name,
|
||||
block_id=str(self.id),
|
||||
) from e
|
||||
71
autogpt_platform/backend/backend/blocks/video/duration.py
Normal file
71
autogpt_platform/backend/backend/blocks/video/duration.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""MediaDurationBlock - Get the duration of a media file."""
|
||||
|
||||
from moviepy.audio.io.AudioFileClip import AudioFileClip
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import SchemaField
|
||||
from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
|
||||
|
||||
|
||||
class MediaDurationBlock(Block):
|
||||
"""Get the duration of a media file."""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
media_in: MediaFileType = SchemaField(
|
||||
description="Media input (URL, data URI, or local path)."
|
||||
)
|
||||
is_video: bool = SchemaField(
|
||||
description="Whether the media is a video (True) or audio (False).",
|
||||
default=True,
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
duration: float = SchemaField(
|
||||
description="Duration of the media file (in seconds)."
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="d8b91fd4-da26-42d4-8ecb-8b196c6d84b6",
|
||||
description="Block to get the duration of a media file.",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=MediaDurationBlock.Input,
|
||||
output_schema=MediaDurationBlock.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
# 1) Store the input media locally
|
||||
local_media_path = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=input_data.media_in,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
media_abspath = get_exec_file_path(graph_exec_id, local_media_path)
|
||||
|
||||
# 2) Load the clip
|
||||
clip = None
|
||||
try:
|
||||
if input_data.is_video:
|
||||
clip = VideoFileClip(media_abspath)
|
||||
else:
|
||||
clip = AudioFileClip(media_abspath)
|
||||
|
||||
yield "duration", clip.duration
|
||||
finally:
|
||||
if clip:
|
||||
clip.close()
|
||||
114
autogpt_platform/backend/backend/blocks/video/loop.py
Normal file
114
autogpt_platform/backend/backend/blocks/video/loop.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""LoopVideoBlock - Loop a video to a given duration or number of repeats."""
|
||||
|
||||
import os
|
||||
from typing import Literal, Optional
|
||||
|
||||
from moviepy.video.fx.Loop import Loop
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import SchemaField
|
||||
from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
|
||||
|
||||
|
||||
class LoopVideoBlock(Block):
|
||||
"""Loop (repeat) a video clip until a given duration or number of loops."""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
video_in: MediaFileType = SchemaField(
|
||||
description="The input video (can be a URL, data URI, or local path)."
|
||||
)
|
||||
duration: Optional[float] = SchemaField(
|
||||
description="Target duration (in seconds) to loop the video to. If omitted, defaults to no looping.",
|
||||
default=None,
|
||||
ge=0.0,
|
||||
)
|
||||
n_loops: Optional[int] = SchemaField(
|
||||
description="Number of times to repeat the video. If omitted, defaults to 1 (no repeat).",
|
||||
default=None,
|
||||
ge=1,
|
||||
)
|
||||
output_return_type: Literal["file_path", "data_uri"] = SchemaField(
|
||||
description="How to return the output video. Either a relative path or base64 data URI.",
|
||||
default="file_path",
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
video_out: str = SchemaField(
|
||||
description="Looped video returned either as a relative path or a data URI."
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="8bf9eef6-5451-4213-b265-25306446e94b",
|
||||
description="Block to loop a video to a given duration or number of repeats.",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=LoopVideoBlock.Input,
|
||||
output_schema=LoopVideoBlock.Output,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
# 1) Store the input video locally
|
||||
local_video_path = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=input_data.video_in,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
input_abspath = get_exec_file_path(graph_exec_id, local_video_path)
|
||||
|
||||
clip: VideoFileClip | None = None
|
||||
looped_clip: VideoFileClip | None = None
|
||||
try:
|
||||
# 2) Load the clip
|
||||
clip = VideoFileClip(input_abspath)
|
||||
|
||||
# 3) Apply the loop effect
|
||||
# Note: Loop effect handles both video and audio looping automatically
|
||||
if input_data.duration:
|
||||
looped_clip = clip.with_effects([Loop(duration=input_data.duration)]) # type: ignore[arg-type] Clip implements shallow copy that loses type info
|
||||
elif input_data.n_loops:
|
||||
looped_clip = clip.with_effects([Loop(n=input_data.n_loops)]) # type: ignore[arg-type] Clip implements shallow copy that loses type info
|
||||
else:
|
||||
raise ValueError("Either 'duration' or 'n_loops' must be provided.")
|
||||
|
||||
# 4) Save the looped output
|
||||
output_filename = MediaFileType(
|
||||
f"{node_exec_id}_looped_{os.path.basename(local_video_path)}"
|
||||
)
|
||||
output_abspath = get_exec_file_path(graph_exec_id, output_filename)
|
||||
|
||||
assert looped_clip is not None
|
||||
|
||||
looped_clip.write_videofile(
|
||||
output_abspath, codec="libx264", audio_codec="aac"
|
||||
)
|
||||
|
||||
# Return as data URI or path
|
||||
video_out = await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=output_filename,
|
||||
user_id=user_id,
|
||||
return_content=input_data.output_return_type == "data_uri",
|
||||
)
|
||||
|
||||
yield "video_out", video_out
|
||||
finally:
|
||||
if looped_clip is not None:
|
||||
looped_clip.close()
|
||||
if clip is not None:
|
||||
clip.close()
|
||||
254
autogpt_platform/backend/backend/blocks/video/narration.py
Normal file
254
autogpt_platform/backend/backend/blocks/video/narration.py
Normal file
@@ -0,0 +1,254 @@
|
||||
"""VideoNarrationBlock - Generate AI voice narration and add to video."""
|
||||
|
||||
import os
|
||||
from typing import Literal
|
||||
|
||||
from elevenlabs import ElevenLabs
|
||||
from moviepy import CompositeAudioClip
|
||||
from moviepy.audio.io.AudioFileClip import AudioFileClip
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
|
||||
from backend.blocks.elevenlabs._auth import (
|
||||
TEST_CREDENTIALS,
|
||||
TEST_CREDENTIALS_INPUT,
|
||||
ElevenLabsCredentials,
|
||||
ElevenLabsCredentialsInput,
|
||||
)
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import CredentialsField, SchemaField
|
||||
from backend.util.exceptions import BlockExecutionError
|
||||
from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
|
||||
|
||||
|
||||
class VideoNarrationBlock(Block):
|
||||
"""Generate AI narration and add to video."""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
credentials: ElevenLabsCredentialsInput = CredentialsField(
|
||||
description="ElevenLabs API key for voice synthesis"
|
||||
)
|
||||
video_in: MediaFileType = SchemaField(
|
||||
description="Input video (URL, data URI, or local path)"
|
||||
)
|
||||
script: str = SchemaField(description="Narration script text")
|
||||
voice_id: str = SchemaField(
|
||||
description="ElevenLabs voice ID", default="21m00Tcm4TlvDq8ikWAM" # Rachel
|
||||
)
|
||||
mix_mode: Literal["replace", "mix", "ducking"] = SchemaField(
|
||||
description="How to combine with original audio. 'ducking' applies stronger attenuation than 'mix'.",
|
||||
default="ducking",
|
||||
)
|
||||
narration_volume: float = SchemaField(
|
||||
description="Narration volume (0.0 to 2.0)",
|
||||
default=1.0,
|
||||
ge=0.0,
|
||||
le=2.0,
|
||||
advanced=True,
|
||||
)
|
||||
original_volume: float = SchemaField(
|
||||
description="Original audio volume when mixing (0.0 to 1.0)",
|
||||
default=0.3,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
advanced=True,
|
||||
)
|
||||
output_return_type: Literal["file_path", "data_uri"] = SchemaField(
|
||||
description="Return the output as a relative path or base64 data URI.",
|
||||
default="file_path",
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
video_out: MediaFileType = SchemaField(
|
||||
description="Video with narration (path or data URI)"
|
||||
)
|
||||
audio_file: MediaFileType = SchemaField(
|
||||
description="Generated audio file (path or data URI)"
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="3d036b53-859c-4b17-9826-ca340f736e0e",
|
||||
description="Generate AI narration and add to video",
|
||||
categories={BlockCategory.MULTIMEDIA, BlockCategory.AI},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
test_input={
|
||||
"video_in": "/tmp/test.mp4",
|
||||
"script": "Hello world",
|
||||
"credentials": TEST_CREDENTIALS_INPUT,
|
||||
},
|
||||
test_credentials=TEST_CREDENTIALS,
|
||||
test_output=[("video_out", str), ("audio_file", str)],
|
||||
test_mock={
|
||||
"_generate_narration_audio": lambda *args: b"mock audio content",
|
||||
"_add_narration_to_video": lambda *args: None,
|
||||
"_store_input_video": lambda *args, **kwargs: "test.mp4",
|
||||
"_store_output_video": lambda *args, **kwargs: "narrated_test.mp4",
|
||||
},
|
||||
)
|
||||
|
||||
async def _store_input_video(
|
||||
self, graph_exec_id: str, file: MediaFileType, user_id: str
|
||||
) -> MediaFileType:
|
||||
"""Store input video. Extracted for testability."""
|
||||
return await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=file,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
|
||||
async def _store_output_video(
|
||||
self,
|
||||
graph_exec_id: str,
|
||||
file: MediaFileType,
|
||||
user_id: str,
|
||||
return_content: bool,
|
||||
) -> MediaFileType:
|
||||
"""Store output video. Extracted for testability."""
|
||||
return await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=file,
|
||||
user_id=user_id,
|
||||
return_content=return_content,
|
||||
)
|
||||
|
||||
def _generate_narration_audio(
|
||||
self, api_key: str, script: str, voice_id: str
|
||||
) -> bytes:
|
||||
"""Generate narration audio via ElevenLabs API."""
|
||||
client = ElevenLabs(api_key=api_key)
|
||||
audio_generator = client.text_to_speech.convert(
|
||||
voice_id=voice_id,
|
||||
text=script,
|
||||
model_id="eleven_monolingual_v1",
|
||||
)
|
||||
# The SDK returns a generator, collect all chunks
|
||||
return b"".join(audio_generator)
|
||||
|
||||
def _add_narration_to_video(
|
||||
self,
|
||||
video_abspath: str,
|
||||
audio_abspath: str,
|
||||
output_abspath: str,
|
||||
mix_mode: str,
|
||||
narration_volume: float,
|
||||
original_volume: float,
|
||||
) -> None:
|
||||
"""Add narration audio to video. Extracted for testability."""
|
||||
video = None
|
||||
final = None
|
||||
narration_original = None
|
||||
narration_scaled = None
|
||||
original = None
|
||||
|
||||
try:
|
||||
video = VideoFileClip(video_abspath)
|
||||
narration_original = AudioFileClip(audio_abspath)
|
||||
narration_scaled = narration_original.with_volume_scaled(narration_volume)
|
||||
narration = narration_scaled
|
||||
|
||||
if mix_mode == "replace":
|
||||
final_audio = narration
|
||||
elif mix_mode == "mix":
|
||||
if video.audio:
|
||||
original = video.audio.with_volume_scaled(original_volume)
|
||||
final_audio = CompositeAudioClip([original, narration])
|
||||
else:
|
||||
final_audio = narration
|
||||
else: # ducking - apply stronger attenuation
|
||||
if video.audio:
|
||||
# Ducking uses a much lower volume for original audio
|
||||
ducking_volume = original_volume * 0.3
|
||||
original = video.audio.with_volume_scaled(ducking_volume)
|
||||
final_audio = CompositeAudioClip([original, narration])
|
||||
else:
|
||||
final_audio = narration
|
||||
|
||||
final = video.with_audio(final_audio)
|
||||
final.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
|
||||
|
||||
finally:
|
||||
if original:
|
||||
original.close()
|
||||
if narration_scaled:
|
||||
narration_scaled.close()
|
||||
if narration_original:
|
||||
narration_original.close()
|
||||
if final:
|
||||
final.close()
|
||||
if video:
|
||||
video.close()
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
credentials: ElevenLabsCredentials,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
try:
|
||||
# Store the input video locally
|
||||
local_video_path = await self._store_input_video(
|
||||
graph_exec_id, input_data.video_in, user_id
|
||||
)
|
||||
video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
|
||||
|
||||
# Generate narration audio via ElevenLabs
|
||||
audio_content = self._generate_narration_audio(
|
||||
credentials.api_key.get_secret_value(),
|
||||
input_data.script,
|
||||
input_data.voice_id,
|
||||
)
|
||||
|
||||
# Save audio to exec file path
|
||||
audio_filename = MediaFileType(f"{node_exec_id}_narration.mp3")
|
||||
audio_abspath = get_exec_file_path(graph_exec_id, audio_filename)
|
||||
os.makedirs(os.path.dirname(audio_abspath), exist_ok=True)
|
||||
with open(audio_abspath, "wb") as f:
|
||||
f.write(audio_content)
|
||||
|
||||
# Add narration to video
|
||||
output_filename = MediaFileType(
|
||||
f"{node_exec_id}_narrated_{os.path.basename(local_video_path)}"
|
||||
)
|
||||
output_abspath = get_exec_file_path(graph_exec_id, output_filename)
|
||||
|
||||
self._add_narration_to_video(
|
||||
video_abspath,
|
||||
audio_abspath,
|
||||
output_abspath,
|
||||
input_data.mix_mode,
|
||||
input_data.narration_volume,
|
||||
input_data.original_volume,
|
||||
)
|
||||
|
||||
# Return as data URI or path
|
||||
return_as_data_uri = input_data.output_return_type == "data_uri"
|
||||
|
||||
video_out = await self._store_output_video(
|
||||
graph_exec_id, output_filename, user_id, return_as_data_uri
|
||||
)
|
||||
|
||||
audio_out = await self._store_output_video(
|
||||
graph_exec_id, audio_filename, user_id, return_as_data_uri
|
||||
)
|
||||
|
||||
yield "video_out", video_out
|
||||
yield "audio_file", audio_out
|
||||
|
||||
except Exception as e:
|
||||
raise BlockExecutionError(
|
||||
message=f"Failed to add narration: {e}",
|
||||
block_name=self.name,
|
||||
block_id=str(self.id),
|
||||
) from e
|
||||
230
autogpt_platform/backend/backend/blocks/video/text_overlay.py
Normal file
230
autogpt_platform/backend/backend/blocks/video/text_overlay.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""VideoTextOverlayBlock - Add text overlay to video."""
|
||||
|
||||
import os
|
||||
from typing import Literal
|
||||
|
||||
from moviepy import CompositeVideoClip, TextClip
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import SchemaField
|
||||
from backend.util.exceptions import BlockExecutionError
|
||||
from backend.util.file import MediaFileType, get_exec_file_path, store_media_file
|
||||
|
||||
|
||||
class VideoTextOverlayBlock(Block):
|
||||
"""Add text overlay/caption to video."""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
video_in: MediaFileType = SchemaField(
|
||||
description="Input video (URL, data URI, or local path)"
|
||||
)
|
||||
text: str = SchemaField(description="Text to overlay on video")
|
||||
position: Literal[
|
||||
"top",
|
||||
"center",
|
||||
"bottom",
|
||||
"top-left",
|
||||
"top-right",
|
||||
"bottom-left",
|
||||
"bottom-right",
|
||||
] = SchemaField(description="Position of text on screen", default="bottom")
|
||||
start_time: float | None = SchemaField(
|
||||
description="When to show text (seconds). None = entire video",
|
||||
default=None,
|
||||
advanced=True,
|
||||
)
|
||||
end_time: float | None = SchemaField(
|
||||
description="When to hide text (seconds). None = until end",
|
||||
default=None,
|
||||
advanced=True,
|
||||
)
|
||||
font_size: int = SchemaField(
|
||||
description="Font size", default=48, ge=12, le=200, advanced=True
|
||||
)
|
||||
font_color: str = SchemaField(
|
||||
description="Font color (hex or name)", default="white", advanced=True
|
||||
)
|
||||
bg_color: str | None = SchemaField(
|
||||
description="Background color behind text (None for transparent)",
|
||||
default=None,
|
||||
advanced=True,
|
||||
)
|
||||
output_return_type: Literal["file_path", "data_uri"] = SchemaField(
|
||||
description="Return the output as a relative path or base64 data URI.",
|
||||
default="file_path",
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
video_out: MediaFileType = SchemaField(
|
||||
description="Video with text overlay (path or data URI)"
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="8ef14de6-cc90-430a-8cfa-3a003be92454",
|
||||
description="Add text overlay/caption to video",
|
||||
categories={BlockCategory.MULTIMEDIA},
|
||||
input_schema=self.Input,
|
||||
output_schema=self.Output,
|
||||
test_input={"video_in": "/tmp/test.mp4", "text": "Hello World"},
|
||||
test_output=[("video_out", str)],
|
||||
test_mock={
|
||||
"_add_text_overlay": lambda *args: None,
|
||||
"_store_input_video": lambda *args, **kwargs: "test.mp4",
|
||||
"_store_output_video": lambda *args, **kwargs: "overlay_test.mp4",
|
||||
},
|
||||
)
|
||||
|
||||
async def _store_input_video(
|
||||
self, graph_exec_id: str, file: MediaFileType, user_id: str
|
||||
) -> MediaFileType:
|
||||
"""Store input video. Extracted for testability."""
|
||||
return await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=file,
|
||||
user_id=user_id,
|
||||
return_content=False,
|
||||
)
|
||||
|
||||
async def _store_output_video(
|
||||
self,
|
||||
graph_exec_id: str,
|
||||
file: MediaFileType,
|
||||
user_id: str,
|
||||
return_content: bool,
|
||||
) -> MediaFileType:
|
||||
"""Store output video. Extracted for testability."""
|
||||
return await store_media_file(
|
||||
graph_exec_id=graph_exec_id,
|
||||
file=file,
|
||||
user_id=user_id,
|
||||
return_content=return_content,
|
||||
)
|
||||
|
||||
def _add_text_overlay(
|
||||
self,
|
||||
video_abspath: str,
|
||||
output_abspath: str,
|
||||
text: str,
|
||||
position: str,
|
||||
start_time: float | None,
|
||||
end_time: float | None,
|
||||
font_size: int,
|
||||
font_color: str,
|
||||
bg_color: str | None,
|
||||
) -> None:
|
||||
"""Add text overlay to video. Extracted for testability."""
|
||||
video = None
|
||||
final = None
|
||||
txt_clip = None
|
||||
try:
|
||||
video = VideoFileClip(video_abspath)
|
||||
|
||||
txt_clip = TextClip(
|
||||
text=text,
|
||||
font_size=font_size,
|
||||
color=font_color,
|
||||
bg_color=bg_color,
|
||||
)
|
||||
|
||||
# Position mapping
|
||||
pos_map = {
|
||||
"top": ("center", "top"),
|
||||
"center": ("center", "center"),
|
||||
"bottom": ("center", "bottom"),
|
||||
"top-left": ("left", "top"),
|
||||
"top-right": ("right", "top"),
|
||||
"bottom-left": ("left", "bottom"),
|
||||
"bottom-right": ("right", "bottom"),
|
||||
}
|
||||
|
||||
txt_clip = txt_clip.with_position(pos_map[position])
|
||||
|
||||
# Set timing
|
||||
start = start_time or 0
|
||||
end = end_time or video.duration
|
||||
duration = max(0, end - start)
|
||||
txt_clip = txt_clip.with_start(start).with_end(end).with_duration(duration)
|
||||
|
||||
final = CompositeVideoClip([video, txt_clip])
|
||||
final.write_videofile(output_abspath, codec="libx264", audio_codec="aac")
|
||||
|
||||
finally:
|
||||
if txt_clip:
|
||||
txt_clip.close()
|
||||
if final:
|
||||
final.close()
|
||||
if video:
|
||||
video.close()
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
node_exec_id: str,
|
||||
graph_exec_id: str,
|
||||
user_id: str,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
# Validate time range if both are provided
|
||||
if (
|
||||
input_data.start_time is not None
|
||||
and input_data.end_time is not None
|
||||
and input_data.end_time <= input_data.start_time
|
||||
):
|
||||
raise BlockExecutionError(
|
||||
message=f"end_time ({input_data.end_time}) must be greater than start_time ({input_data.start_time})",
|
||||
block_name=self.name,
|
||||
block_id=str(self.id),
|
||||
)
|
||||
|
||||
try:
|
||||
# Store the input video locally
|
||||
local_video_path = await self._store_input_video(
|
||||
graph_exec_id, input_data.video_in, user_id
|
||||
)
|
||||
video_abspath = get_exec_file_path(graph_exec_id, local_video_path)
|
||||
|
||||
# Build output path
|
||||
output_filename = MediaFileType(
|
||||
f"{node_exec_id}_overlay_{os.path.basename(local_video_path)}"
|
||||
)
|
||||
output_abspath = get_exec_file_path(graph_exec_id, output_filename)
|
||||
|
||||
self._add_text_overlay(
|
||||
video_abspath,
|
||||
output_abspath,
|
||||
input_data.text,
|
||||
input_data.position,
|
||||
input_data.start_time,
|
||||
input_data.end_time,
|
||||
input_data.font_size,
|
||||
input_data.font_color,
|
||||
input_data.bg_color,
|
||||
)
|
||||
|
||||
# Return as data URI or path
|
||||
video_out = await self._store_output_video(
|
||||
graph_exec_id,
|
||||
output_filename,
|
||||
user_id,
|
||||
input_data.output_return_type == "data_uri",
|
||||
)
|
||||
|
||||
yield "video_out", video_out
|
||||
|
||||
except BlockExecutionError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise BlockExecutionError(
|
||||
message=f"Failed to add text overlay: {e}",
|
||||
block_name=self.name,
|
||||
block_id=str(self.id),
|
||||
) from e
|
||||
@@ -36,12 +36,14 @@ from backend.blocks.replicate.replicate_block import ReplicateModelBlock
|
||||
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
|
||||
from backend.blocks.talking_head import CreateTalkingAvatarVideoBlock
|
||||
from backend.blocks.text_to_speech_block import UnrealTextToSpeechBlock
|
||||
from backend.blocks.video.narration import VideoNarrationBlock
|
||||
from backend.data.block import Block, BlockCost, BlockCostType
|
||||
from backend.integrations.credentials_store import (
|
||||
aiml_api_credentials,
|
||||
anthropic_credentials,
|
||||
apollo_credentials,
|
||||
did_credentials,
|
||||
elevenlabs_credentials,
|
||||
enrichlayer_credentials,
|
||||
groq_credentials,
|
||||
ideogram_credentials,
|
||||
@@ -640,4 +642,16 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
|
||||
},
|
||||
),
|
||||
],
|
||||
VideoNarrationBlock: [
|
||||
BlockCost(
|
||||
cost_amount=5, # ElevenLabs TTS cost
|
||||
cost_filter={
|
||||
"credentials": {
|
||||
"id": elevenlabs_credentials.id,
|
||||
"provider": elevenlabs_credentials.provider,
|
||||
"type": elevenlabs_credentials.type,
|
||||
}
|
||||
},
|
||||
)
|
||||
],
|
||||
}
|
||||
|
||||
@@ -224,6 +224,14 @@ openweathermap_credentials = APIKeyCredentials(
|
||||
expires_at=None,
|
||||
)
|
||||
|
||||
elevenlabs_credentials = APIKeyCredentials(
|
||||
id="f4a8b6c2-3d1e-4f5a-9b8c-7d6e5f4a3b2c",
|
||||
provider="elevenlabs",
|
||||
api_key=SecretStr(settings.secrets.elevenlabs_api_key),
|
||||
title="Use Credits for ElevenLabs",
|
||||
expires_at=None,
|
||||
)
|
||||
|
||||
DEFAULT_CREDENTIALS = [
|
||||
ollama_credentials,
|
||||
revid_credentials,
|
||||
@@ -252,6 +260,7 @@ DEFAULT_CREDENTIALS = [
|
||||
v0_credentials,
|
||||
webshare_proxy_credentials,
|
||||
openweathermap_credentials,
|
||||
elevenlabs_credentials,
|
||||
]
|
||||
|
||||
SYSTEM_CREDENTIAL_IDS = {cred.id for cred in DEFAULT_CREDENTIALS}
|
||||
@@ -366,6 +375,8 @@ class IntegrationCredentialsStore:
|
||||
all_credentials.append(webshare_proxy_credentials)
|
||||
if settings.secrets.openweathermap_api_key:
|
||||
all_credentials.append(openweathermap_credentials)
|
||||
if settings.secrets.elevenlabs_api_key:
|
||||
all_credentials.append(elevenlabs_credentials)
|
||||
return all_credentials
|
||||
|
||||
async def get_creds_by_id(
|
||||
|
||||
@@ -18,6 +18,7 @@ class ProviderName(str, Enum):
|
||||
DISCORD = "discord"
|
||||
D_ID = "d_id"
|
||||
E2B = "e2b"
|
||||
ELEVENLABS = "elevenlabs"
|
||||
FAL = "fal"
|
||||
GITHUB = "github"
|
||||
GOOGLE = "google"
|
||||
|
||||
@@ -630,6 +630,7 @@ class Secrets(UpdateTrackingModel["Secrets"], BaseSettings):
|
||||
e2b_api_key: str = Field(default="", description="E2B API key")
|
||||
nvidia_api_key: str = Field(default="", description="Nvidia API key")
|
||||
mem0_api_key: str = Field(default="", description="Mem0 API key")
|
||||
elevenlabs_api_key: str = Field(default="", description="ElevenLabs API key")
|
||||
|
||||
linear_client_id: str = Field(default="", description="Linear client ID")
|
||||
linear_client_secret: str = Field(default="", description="Linear client secret")
|
||||
|
||||
47
autogpt_platform/backend/poetry.lock
generated
47
autogpt_platform/backend/poetry.lock
generated
@@ -1169,6 +1169,29 @@ attrs = ">=21.3.0"
|
||||
e2b = ">=1.5.4,<2.0.0"
|
||||
httpx = ">=0.20.0,<1.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "elevenlabs"
|
||||
version = "1.59.0"
|
||||
description = ""
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "elevenlabs-1.59.0-py3-none-any.whl", hash = "sha256:468145db81a0bc867708b4a8619699f75583e9481b395ec1339d0b443da771ed"},
|
||||
{file = "elevenlabs-1.59.0.tar.gz", hash = "sha256:16e735bd594e86d415dd445d249c8cc28b09996cfd627fbc10102c0a84698859"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
httpx = ">=0.21.2"
|
||||
pydantic = ">=1.9.2"
|
||||
pydantic-core = ">=2.18.2,<3.0.0"
|
||||
requests = ">=2.20"
|
||||
typing_extensions = ">=4.0.0"
|
||||
websockets = ">=11.0"
|
||||
|
||||
[package.extras]
|
||||
pyaudio = ["pyaudio (>=0.2.14)"]
|
||||
|
||||
[[package]]
|
||||
name = "email-validator"
|
||||
version = "2.2.0"
|
||||
@@ -7361,6 +7384,28 @@ files = [
|
||||
defusedxml = ">=0.7.1,<0.8.0"
|
||||
requests = "*"
|
||||
|
||||
[[package]]
|
||||
name = "yt-dlp"
|
||||
version = "2024.12.23"
|
||||
description = "A feature-rich command-line audio/video downloader"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "yt_dlp-2024.12.23-py3-none-any.whl", hash = "sha256:2fc08a5221a0379628ac4e7324c6c69a95b9fdfa7a7ca3187444b3b7451e38be"},
|
||||
{file = "yt_dlp-2024.12.23.tar.gz", hash = "sha256:ac0e72b5a9017ba104b4258546201a7cedc38e8bd20727e0c63b77c829b425e9"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
build = ["build", "hatchling", "pip", "setuptools (>=71.0.2)", "wheel"]
|
||||
curl-cffi = ["curl-cffi (==0.5.10) ; os_name == \"nt\" and implementation_name == \"cpython\"", "curl-cffi (>=0.5.10,!=0.6.*,<0.7.2) ; os_name != \"nt\" and implementation_name == \"cpython\""]
|
||||
default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=1.26.17,<3)", "websockets (>=13.0)"]
|
||||
dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.8.0,<0.9.0)"]
|
||||
pyinstaller = ["pyinstaller (>=6.11.1)"]
|
||||
secretstorage = ["cffi", "secretstorage"]
|
||||
static-analysis = ["autopep8 (>=2.0,<3.0)", "ruff (>=0.8.0,<0.9.0)"]
|
||||
test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "zerobouncesdk"
|
||||
version = "1.1.2"
|
||||
@@ -7512,4 +7557,4 @@ cffi = ["cffi (>=1.11)"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.10,<3.14"
|
||||
content-hash = "18b92e09596298c82432e4d0a85cb6d80a40b4229bee0a0c15f0529fd6cb21a4"
|
||||
content-hash = "ee24b0e885ea951eecbda5e76314d711ed5ae02f63c69fd79c11ad2e3fe5fb0f"
|
||||
|
||||
@@ -20,6 +20,7 @@ click = "^8.2.0"
|
||||
cryptography = "^45.0"
|
||||
discord-py = "^2.5.2"
|
||||
e2b-code-interpreter = "^1.5.2"
|
||||
elevenlabs = "^1.50.0"
|
||||
fastapi = "^0.116.1"
|
||||
feedparser = "^6.0.11"
|
||||
flake8 = "^7.3.0"
|
||||
@@ -71,6 +72,7 @@ tweepy = "^4.16.0"
|
||||
uvicorn = { extras = ["standard"], version = "^0.35.0" }
|
||||
websockets = "^15.0"
|
||||
youtube-transcript-api = "^1.2.1"
|
||||
yt-dlp = "^2024.12.13"
|
||||
zerobouncesdk = "^1.1.2"
|
||||
# NOTE: please insert new dependencies in their alphabetical location
|
||||
pytest-snapshot = "^0.9.0"
|
||||
|
||||
@@ -26,6 +26,7 @@ export const providerIcons: Partial<
|
||||
nvidia: fallbackIcon,
|
||||
discord: FaDiscord,
|
||||
d_id: fallbackIcon,
|
||||
elevenlabs: fallbackIcon,
|
||||
google_maps: FaGoogle,
|
||||
jina: fallbackIcon,
|
||||
ideogram: fallbackIcon,
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
# Video editing blocks
|
||||
Reference in New Issue
Block a user