Compare commits

..

1 Commits

Author SHA1 Message Date
Swifty
50f39e55f7 added ability to disable cors at the backend 2025-09-11 09:59:47 +02:00
659 changed files with 6596 additions and 29649 deletions

View File

@@ -3,7 +3,6 @@ name: AutoGPT Platform - Deploy Prod Environment
on:
release:
types: [published]
workflow_dispatch:
permissions:
contents: 'read'
@@ -18,8 +17,6 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ github.ref_name || 'master' }}
- name: Set up Python
uses: actions/setup-python@v5
@@ -39,7 +36,7 @@ jobs:
DATABASE_URL: ${{ secrets.BACKEND_DATABASE_URL }}
DIRECT_URL: ${{ secrets.BACKEND_DATABASE_URL }}
trigger:
needs: migrate
runs-on: ubuntu-latest
@@ -50,5 +47,4 @@ jobs:
token: ${{ secrets.DEPLOY_TOKEN }}
repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
event-type: build_deploy_prod
client-payload: |
{"ref": "${{ github.ref_name || 'master' }}", "repository": "${{ github.repository }}"}
client-payload: '{"ref": "${{ github.ref }}", "sha": "${{ github.sha }}", "repository": "${{ github.repository }}"}'

View File

@@ -5,13 +5,6 @@ on:
branches: [ dev ]
paths:
- 'autogpt_platform/**'
workflow_dispatch:
inputs:
git_ref:
description: 'Git ref (branch/tag) of AutoGPT to deploy'
required: true
default: 'master'
type: string
permissions:
contents: 'read'
@@ -26,8 +19,6 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.git_ref || github.ref_name }}
- name: Set up Python
uses: actions/setup-python@v5
@@ -57,4 +48,4 @@ jobs:
token: ${{ secrets.DEPLOY_TOKEN }}
repository: Significant-Gravitas/AutoGPT_cloud_infrastructure
event-type: build_deploy_dev
client-payload: '{"ref": "${{ github.event.inputs.git_ref || github.ref }}", "repository": "${{ github.repository }}"}'
client-payload: '{"ref": "${{ github.ref }}", "sha": "${{ github.sha }}", "repository": "${{ github.repository }}"}'

View File

@@ -37,7 +37,9 @@ jobs:
services:
redis:
image: redis:latest
image: bitnami/redis:6.2
env:
REDIS_PASSWORD: testpassword
ports:
- 6379:6379
rabbitmq:
@@ -202,6 +204,7 @@ jobs:
JWT_VERIFY_KEY: ${{ steps.supabase.outputs.JWT_SECRET }}
REDIS_HOST: "localhost"
REDIS_PORT: "6379"
REDIS_PASSWORD: "testpassword"
ENCRYPTION_KEY: "dvziYgz0KSK8FENhju0ZYi8-fRTfAdlz6YLhdB_jhNw=" # DO NOT USE IN PRODUCTION!!
env:

View File

@@ -1,3 +1,6 @@
[pr_reviewer]
num_code_suggestions=0
[pr_code_suggestions]
commitable_code_suggestions=false
num_code_suggestions=0

View File

@@ -1,47 +0,0 @@
.PHONY: start-core stop-core logs-core format lint migrate run-backend run-frontend
# Run just Supabase + Redis + RabbitMQ
start-core:
docker compose up -d deps
# Stop core services
stop-core:
docker compose stop deps
# View logs for core services
logs-core:
docker compose logs -f deps
# Run formatting and linting for backend and frontend
format:
cd backend && poetry run format
cd frontend && pnpm format
cd frontend && pnpm lint
init-env:
cp -n .env.default .env || true
cd backend && cp -n .env.default .env || true
cd frontend && cp -n .env.default .env || true
# Run migrations for backend
migrate:
cd backend && poetry run prisma migrate deploy
cd backend && poetry run prisma generate
run-backend:
cd backend && poetry run app
run-frontend:
cd frontend && pnpm dev
help:
@echo "Usage: make <target>"
@echo "Targets:"
@echo " start-core - Start just the core services (Supabase, Redis, RabbitMQ) in background"
@echo " stop-core - Stop the core services"
@echo " logs-core - Tail the logs for core services"
@echo " format - Format & lint backend (Python) and frontend (TypeScript) code"
@echo " migrate - Run backend database migrations"
@echo " run-backend - Run the backend FastAPI server"
@echo " run-frontend - Run the frontend Next.js development server"

View File

@@ -38,37 +38,6 @@ To run the AutoGPT Platform, follow these steps:
4. After all the services are in ready state, open your browser and navigate to `http://localhost:3000` to access the AutoGPT Platform frontend.
### Running Just Core services
You can now run the following to enable just the core services.
```
# For help
make help
# Run just Supabase + Redis + RabbitMQ
make start-core
# Stop core services
make stop-core
# View logs from core services
make logs-core
# Run formatting and linting for backend and frontend
make format
# Run migrations for backend database
make migrate
# Run backend server
make run-backend
# Run frontend development server
make run-frontend
```
### Docker Compose Commands
Here are some useful Docker Compose commands for managing your AutoGPT Platform:

View File

@@ -10,7 +10,7 @@ from .jwt_utils import get_jwt_payload, verify_user
from .models import User
async def requires_user(jwt_payload: dict = fastapi.Security(get_jwt_payload)) -> User:
def requires_user(jwt_payload: dict = fastapi.Security(get_jwt_payload)) -> User:
"""
FastAPI dependency that requires a valid authenticated user.
@@ -20,9 +20,7 @@ async def requires_user(jwt_payload: dict = fastapi.Security(get_jwt_payload)) -
return verify_user(jwt_payload, admin_only=False)
async def requires_admin_user(
jwt_payload: dict = fastapi.Security(get_jwt_payload),
) -> User:
def requires_admin_user(jwt_payload: dict = fastapi.Security(get_jwt_payload)) -> User:
"""
FastAPI dependency that requires a valid admin user.
@@ -32,7 +30,7 @@ async def requires_admin_user(
return verify_user(jwt_payload, admin_only=True)
async def get_user_id(jwt_payload: dict = fastapi.Security(get_jwt_payload)) -> str:
def get_user_id(jwt_payload: dict = fastapi.Security(get_jwt_payload)) -> str:
"""
FastAPI dependency that returns the ID of the authenticated user.

View File

@@ -45,7 +45,7 @@ class TestAuthDependencies:
"""Create a test client."""
return TestClient(app)
async def test_requires_user_with_valid_jwt_payload(self, mocker: MockerFixture):
def test_requires_user_with_valid_jwt_payload(self, mocker: MockerFixture):
"""Test requires_user with valid JWT payload."""
jwt_payload = {"sub": "user-123", "role": "user", "email": "user@example.com"}
@@ -53,12 +53,12 @@ class TestAuthDependencies:
mocker.patch(
"autogpt_libs.auth.dependencies.get_jwt_payload", return_value=jwt_payload
)
user = await requires_user(jwt_payload)
user = requires_user(jwt_payload)
assert isinstance(user, User)
assert user.user_id == "user-123"
assert user.role == "user"
async def test_requires_user_with_admin_jwt_payload(self, mocker: MockerFixture):
def test_requires_user_with_admin_jwt_payload(self, mocker: MockerFixture):
"""Test requires_user accepts admin users."""
jwt_payload = {
"sub": "admin-456",
@@ -69,28 +69,28 @@ class TestAuthDependencies:
mocker.patch(
"autogpt_libs.auth.dependencies.get_jwt_payload", return_value=jwt_payload
)
user = await requires_user(jwt_payload)
user = requires_user(jwt_payload)
assert user.user_id == "admin-456"
assert user.role == "admin"
async def test_requires_user_missing_sub(self):
def test_requires_user_missing_sub(self):
"""Test requires_user with missing user ID."""
jwt_payload = {"role": "user", "email": "user@example.com"}
with pytest.raises(HTTPException) as exc_info:
await requires_user(jwt_payload)
requires_user(jwt_payload)
assert exc_info.value.status_code == 401
assert "User ID not found" in exc_info.value.detail
async def test_requires_user_empty_sub(self):
def test_requires_user_empty_sub(self):
"""Test requires_user with empty user ID."""
jwt_payload = {"sub": "", "role": "user"}
with pytest.raises(HTTPException) as exc_info:
await requires_user(jwt_payload)
requires_user(jwt_payload)
assert exc_info.value.status_code == 401
async def test_requires_admin_user_with_admin(self, mocker: MockerFixture):
def test_requires_admin_user_with_admin(self, mocker: MockerFixture):
"""Test requires_admin_user with admin role."""
jwt_payload = {
"sub": "admin-789",
@@ -101,51 +101,51 @@ class TestAuthDependencies:
mocker.patch(
"autogpt_libs.auth.dependencies.get_jwt_payload", return_value=jwt_payload
)
user = await requires_admin_user(jwt_payload)
user = requires_admin_user(jwt_payload)
assert user.user_id == "admin-789"
assert user.role == "admin"
async def test_requires_admin_user_with_regular_user(self):
def test_requires_admin_user_with_regular_user(self):
"""Test requires_admin_user rejects regular users."""
jwt_payload = {"sub": "user-123", "role": "user", "email": "user@example.com"}
with pytest.raises(HTTPException) as exc_info:
await requires_admin_user(jwt_payload)
requires_admin_user(jwt_payload)
assert exc_info.value.status_code == 403
assert "Admin access required" in exc_info.value.detail
async def test_requires_admin_user_missing_role(self):
def test_requires_admin_user_missing_role(self):
"""Test requires_admin_user with missing role."""
jwt_payload = {"sub": "user-123", "email": "user@example.com"}
with pytest.raises(KeyError):
await requires_admin_user(jwt_payload)
requires_admin_user(jwt_payload)
async def test_get_user_id_with_valid_payload(self, mocker: MockerFixture):
def test_get_user_id_with_valid_payload(self, mocker: MockerFixture):
"""Test get_user_id extracts user ID correctly."""
jwt_payload = {"sub": "user-id-xyz", "role": "user"}
mocker.patch(
"autogpt_libs.auth.dependencies.get_jwt_payload", return_value=jwt_payload
)
user_id = await get_user_id(jwt_payload)
user_id = get_user_id(jwt_payload)
assert user_id == "user-id-xyz"
async def test_get_user_id_missing_sub(self):
def test_get_user_id_missing_sub(self):
"""Test get_user_id with missing user ID."""
jwt_payload = {"role": "user"}
with pytest.raises(HTTPException) as exc_info:
await get_user_id(jwt_payload)
get_user_id(jwt_payload)
assert exc_info.value.status_code == 401
assert "User ID not found" in exc_info.value.detail
async def test_get_user_id_none_sub(self):
def test_get_user_id_none_sub(self):
"""Test get_user_id with None user ID."""
jwt_payload = {"sub": None, "role": "user"}
with pytest.raises(HTTPException) as exc_info:
await get_user_id(jwt_payload)
get_user_id(jwt_payload)
assert exc_info.value.status_code == 401
@@ -170,7 +170,7 @@ class TestAuthDependenciesIntegration:
return _create_token
async def test_endpoint_auth_enabled_no_token(self):
def test_endpoint_auth_enabled_no_token(self):
"""Test endpoints require token when auth is enabled."""
app = FastAPI()
@@ -184,7 +184,7 @@ class TestAuthDependenciesIntegration:
response = client.get("/test")
assert response.status_code == 401
async def test_endpoint_with_valid_token(self, create_token):
def test_endpoint_with_valid_token(self, create_token):
"""Test endpoint with valid JWT token."""
app = FastAPI()
@@ -203,7 +203,7 @@ class TestAuthDependenciesIntegration:
assert response.status_code == 200
assert response.json()["user_id"] == "test-user"
async def test_admin_endpoint_requires_admin_role(self, create_token):
def test_admin_endpoint_requires_admin_role(self, create_token):
"""Test admin endpoint rejects non-admin users."""
app = FastAPI()
@@ -240,7 +240,7 @@ class TestAuthDependenciesIntegration:
class TestAuthDependenciesEdgeCases:
"""Edge case tests for authentication dependencies."""
async def test_dependency_with_complex_payload(self):
def test_dependency_with_complex_payload(self):
"""Test dependencies handle complex JWT payloads."""
complex_payload = {
"sub": "user-123",
@@ -256,14 +256,14 @@ class TestAuthDependenciesEdgeCases:
"exp": 9999999999,
}
user = await requires_user(complex_payload)
user = requires_user(complex_payload)
assert user.user_id == "user-123"
assert user.email == "test@example.com"
admin = await requires_admin_user(complex_payload)
admin = requires_admin_user(complex_payload)
assert admin.role == "admin"
async def test_dependency_with_unicode_in_payload(self):
def test_dependency_with_unicode_in_payload(self):
"""Test dependencies handle unicode in JWT payloads."""
unicode_payload = {
"sub": "user-😀-123",
@@ -272,11 +272,11 @@ class TestAuthDependenciesEdgeCases:
"name": "日本語",
}
user = await requires_user(unicode_payload)
user = requires_user(unicode_payload)
assert "😀" in user.user_id
assert user.email == "测试@example.com"
async def test_dependency_with_null_values(self):
def test_dependency_with_null_values(self):
"""Test dependencies handle null values in payload."""
null_payload = {
"sub": "user-123",
@@ -286,18 +286,18 @@ class TestAuthDependenciesEdgeCases:
"metadata": None,
}
user = await requires_user(null_payload)
user = requires_user(null_payload)
assert user.user_id == "user-123"
assert user.email is None
async def test_concurrent_requests_isolation(self):
def test_concurrent_requests_isolation(self):
"""Test that concurrent requests don't interfere with each other."""
payload1 = {"sub": "user-1", "role": "user"}
payload2 = {"sub": "user-2", "role": "admin"}
# Simulate concurrent processing
user1 = await requires_user(payload1)
user2 = await requires_admin_user(payload2)
user1 = requires_user(payload1)
user2 = requires_admin_user(payload2)
assert user1.user_id == "user-1"
assert user2.user_id == "user-2"
@@ -314,7 +314,7 @@ class TestAuthDependenciesEdgeCases:
({"sub": "user", "role": "user"}, "Admin access required", True),
],
)
async def test_dependency_error_cases(
def test_dependency_error_cases(
self, payload, expected_error: str, admin_only: bool
):
"""Test that errors propagate correctly through dependencies."""
@@ -325,7 +325,7 @@ class TestAuthDependenciesEdgeCases:
verify_user(payload, admin_only=admin_only)
assert expected_error in exc_info.value.detail
async def test_dependency_valid_user(self):
def test_dependency_valid_user(self):
"""Test valid user case for dependency."""
# Import verify_user to test it directly since dependencies use FastAPI Security
from autogpt_libs.auth.jwt_utils import verify_user

View File

@@ -16,7 +16,7 @@ bearer_jwt_auth = HTTPBearer(
)
async def get_jwt_payload(
def get_jwt_payload(
credentials: HTTPAuthorizationCredentials | None = Security(bearer_jwt_auth),
) -> dict[str, Any]:
"""

View File

@@ -116,32 +116,32 @@ def test_parse_jwt_token_missing_audience():
assert "Invalid token" in str(exc_info.value)
async def test_get_jwt_payload_with_valid_token():
def test_get_jwt_payload_with_valid_token():
"""Test extracting JWT payload with valid bearer token."""
token = create_token(TEST_USER_PAYLOAD)
credentials = HTTPAuthorizationCredentials(scheme="Bearer", credentials=token)
result = await jwt_utils.get_jwt_payload(credentials)
result = jwt_utils.get_jwt_payload(credentials)
assert result["sub"] == "test-user-id"
assert result["role"] == "user"
async def test_get_jwt_payload_no_credentials():
def test_get_jwt_payload_no_credentials():
"""Test JWT payload when no credentials provided."""
with pytest.raises(HTTPException) as exc_info:
await jwt_utils.get_jwt_payload(None)
jwt_utils.get_jwt_payload(None)
assert exc_info.value.status_code == 401
assert "Authorization header is missing" in exc_info.value.detail
async def test_get_jwt_payload_invalid_token():
def test_get_jwt_payload_invalid_token():
"""Test JWT payload extraction with invalid token."""
credentials = HTTPAuthorizationCredentials(
scheme="Bearer", credentials="invalid.token.here"
)
with pytest.raises(HTTPException) as exc_info:
await jwt_utils.get_jwt_payload(credentials)
jwt_utils.get_jwt_payload(credentials)
assert exc_info.value.status_code == 401
assert "Invalid token" in exc_info.value.detail

View File

@@ -4,7 +4,6 @@ import logging
import os
import socket
import sys
from logging.handlers import RotatingFileHandler
from pathlib import Path
from pydantic import Field, field_validator
@@ -140,13 +139,8 @@ def configure_logging(force_cloud_logging: bool = False) -> None:
print(f"Log directory: {config.log_dir}")
# Activity log handler (INFO and above)
# Security fix: Use RotatingFileHandler with size limits to prevent disk exhaustion
activity_log_handler = RotatingFileHandler(
config.log_dir / LOG_FILE,
mode="a",
encoding="utf-8",
maxBytes=10 * 1024 * 1024, # 10MB per file
backupCount=3, # Keep 3 backup files (40MB total)
activity_log_handler = logging.FileHandler(
config.log_dir / LOG_FILE, "a", "utf-8"
)
activity_log_handler.setLevel(config.level)
activity_log_handler.setFormatter(
@@ -156,13 +150,8 @@ def configure_logging(force_cloud_logging: bool = False) -> None:
if config.level == logging.DEBUG:
# Debug log handler (all levels)
# Security fix: Use RotatingFileHandler with size limits
debug_log_handler = RotatingFileHandler(
config.log_dir / DEBUG_LOG_FILE,
mode="a",
encoding="utf-8",
maxBytes=10 * 1024 * 1024, # 10MB per file
backupCount=3, # Keep 3 backup files (40MB total)
debug_log_handler = logging.FileHandler(
config.log_dir / DEBUG_LOG_FILE, "a", "utf-8"
)
debug_log_handler.setLevel(logging.DEBUG)
debug_log_handler.setFormatter(
@@ -171,13 +160,8 @@ def configure_logging(force_cloud_logging: bool = False) -> None:
log_handlers.append(debug_log_handler)
# Error log handler (ERROR and above)
# Security fix: Use RotatingFileHandler with size limits
error_log_handler = RotatingFileHandler(
config.log_dir / ERROR_LOG_FILE,
mode="a",
encoding="utf-8",
maxBytes=10 * 1024 * 1024, # 10MB per file
backupCount=3, # Keep 3 backup files (40MB total)
error_log_handler = logging.FileHandler(
config.log_dir / ERROR_LOG_FILE, "a", "utf-8"
)
error_log_handler.setLevel(logging.ERROR)
error_log_handler.setFormatter(AGPTFormatter(DEBUG_LOG_FORMAT, no_color=True))

View File

@@ -1,5 +1,3 @@
from typing import Optional
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -15,8 +13,8 @@ class RateLimitSettings(BaseSettings):
default="6379", description="Redis port", validation_alias="REDIS_PORT"
)
redis_password: Optional[str] = Field(
default=None,
redis_password: str = Field(
default="password",
description="Redis password",
validation_alias="REDIS_PASSWORD",
)

View File

@@ -11,7 +11,7 @@ class RateLimiter:
self,
redis_host: str = RATE_LIMIT_SETTINGS.redis_host,
redis_port: str = RATE_LIMIT_SETTINGS.redis_port,
redis_password: str | None = RATE_LIMIT_SETTINGS.redis_password,
redis_password: str = RATE_LIMIT_SETTINGS.redis_password,
requests_per_minute: int = RATE_LIMIT_SETTINGS.requests_per_minute,
):
self.redis = Redis(

View File

@@ -1,68 +1,90 @@
import asyncio
import inspect
import logging
import threading
import time
from functools import wraps
from typing import (
Any,
Awaitable,
Callable,
ParamSpec,
Protocol,
Tuple,
TypeVar,
cast,
overload,
runtime_checkable,
)
P = ParamSpec("P")
R = TypeVar("R")
R_co = TypeVar("R_co", covariant=True)
logger = logging.getLogger(__name__)
def _make_hashable_key(
args: tuple[Any, ...], kwargs: dict[str, Any]
) -> tuple[Any, ...]:
"""
Convert args and kwargs into a hashable cache key.
@overload
def thread_cached(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[R]]:
pass
Handles unhashable types like dict, list, set by converting them to
their sorted string representations.
"""
def make_hashable(obj: Any) -> Any:
"""Recursively convert an object to a hashable representation."""
if isinstance(obj, dict):
# Sort dict items to ensure consistent ordering
return (
"__dict__",
tuple(sorted((k, make_hashable(v)) for k, v in obj.items())),
)
elif isinstance(obj, (list, tuple)):
return ("__list__", tuple(make_hashable(item) for item in obj))
elif isinstance(obj, set):
return ("__set__", tuple(sorted(make_hashable(item) for item in obj)))
elif hasattr(obj, "__dict__"):
# Handle objects with __dict__ attribute
return ("__obj__", obj.__class__.__name__, make_hashable(obj.__dict__))
else:
# For basic hashable types (str, int, bool, None, etc.)
try:
hash(obj)
return obj
except TypeError:
# Fallback: convert to string representation
return ("__str__", str(obj))
@overload
def thread_cached(func: Callable[P, R]) -> Callable[P, R]:
pass
hashable_args = tuple(make_hashable(arg) for arg in args)
hashable_kwargs = tuple(sorted((k, make_hashable(v)) for k, v in kwargs.items()))
return (hashable_args, hashable_kwargs)
def thread_cached(
func: Callable[P, R] | Callable[P, Awaitable[R]],
) -> Callable[P, R] | Callable[P, Awaitable[R]]:
thread_local = threading.local()
def _clear():
if hasattr(thread_local, "cache"):
del thread_local.cache
if inspect.iscoroutinefunction(func):
async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
cache = getattr(thread_local, "cache", None)
if cache is None:
cache = thread_local.cache = {}
key = (args, tuple(sorted(kwargs.items())))
if key not in cache:
cache[key] = await cast(Callable[P, Awaitable[R]], func)(
*args, **kwargs
)
return cache[key]
setattr(async_wrapper, "clear_cache", _clear)
return async_wrapper
else:
def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
cache = getattr(thread_local, "cache", None)
if cache is None:
cache = thread_local.cache = {}
key = (args, tuple(sorted(kwargs.items())))
if key not in cache:
cache[key] = func(*args, **kwargs)
return cache[key]
setattr(sync_wrapper, "clear_cache", _clear)
return sync_wrapper
def clear_thread_cache(func: Callable) -> None:
if clear := getattr(func, "clear_cache", None):
clear()
FuncT = TypeVar("FuncT")
R_co = TypeVar("R_co", covariant=True)
@runtime_checkable
class CachedFunction(Protocol[P, R_co]):
"""Protocol for cached functions with cache management methods."""
class AsyncCachedFunction(Protocol[P, R_co]):
"""Protocol for async functions with cache management methods."""
def cache_clear(self) -> None:
"""Clear all cached entries."""
@@ -72,180 +94,101 @@ class CachedFunction(Protocol[P, R_co]):
"""Get cache statistics."""
return {}
def cache_delete(self, *args: P.args, **kwargs: P.kwargs) -> bool:
"""Delete a specific cache entry by its arguments. Returns True if entry existed."""
return False
def __call__(self, *args: P.args, **kwargs: P.kwargs) -> R_co:
async def __call__(self, *args: P.args, **kwargs: P.kwargs) -> R_co:
"""Call the cached function."""
return None # type: ignore
def cached(
*,
maxsize: int = 128,
ttl_seconds: int | None = None,
) -> Callable[[Callable], CachedFunction]:
def async_ttl_cache(
maxsize: int = 128, ttl_seconds: int | None = None
) -> Callable[[Callable[P, Awaitable[R]]], AsyncCachedFunction[P, R]]:
"""
Thundering herd safe cache decorator for both sync and async functions.
TTL (Time To Live) cache decorator for async functions.
Uses double-checked locking to prevent multiple threads/coroutines from
executing the expensive operation simultaneously during cache misses.
Similar to functools.lru_cache but works with async functions and includes optional TTL.
Args:
func: The function to cache (when used without parentheses)
maxsize: Maximum number of cached entries
ttl_seconds: Time to live in seconds. If None, entries never expire
ttl_seconds: Time to live in seconds. If None, entries never expire (like lru_cache)
Returns:
Decorated function or decorator
Decorator function
Example:
@cache() # Default: maxsize=128, no TTL
def expensive_sync_operation(param: str) -> dict:
# With TTL
@async_ttl_cache(maxsize=1000, ttl_seconds=300)
async def api_call(param: str) -> dict:
return {"result": param}
@cache() # Works with async too
async def expensive_async_operation(param: str) -> dict:
return {"result": param}
@cache(maxsize=1000, ttl_seconds=300) # Custom maxsize and TTL
def another_operation(param: str) -> dict:
# Without TTL (permanent cache like lru_cache)
@async_ttl_cache(maxsize=1000)
async def expensive_computation(param: str) -> dict:
return {"result": param}
"""
def decorator(target_func):
# Cache storage and per-event-loop locks
cache_storage = {}
_event_loop_locks = {} # Maps event loop to its asyncio.Lock
def decorator(
async_func: Callable[P, Awaitable[R]],
) -> AsyncCachedFunction[P, R]:
# Cache storage - use union type to handle both cases
cache_storage: dict[tuple, R | Tuple[R, float]] = {}
if inspect.iscoroutinefunction(target_func):
@wraps(async_func)
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
# Create cache key from arguments
key = (args, tuple(sorted(kwargs.items())))
current_time = time.time()
def _get_cache_lock():
"""Get or create an asyncio.Lock for the current event loop."""
try:
loop = asyncio.get_running_loop()
except RuntimeError:
# No event loop, use None as default key
loop = None
if loop not in _event_loop_locks:
return _event_loop_locks.setdefault(loop, asyncio.Lock())
return _event_loop_locks[loop]
@wraps(target_func)
async def async_wrapper(*args: P.args, **kwargs: P.kwargs):
key = _make_hashable_key(args, kwargs)
current_time = time.time()
# Fast path: check cache without lock
if key in cache_storage:
if ttl_seconds is None:
logger.debug(f"Cache hit for {target_func.__name__}")
return cache_storage[key]
else:
cached_data = cache_storage[key]
if isinstance(cached_data, tuple):
result, timestamp = cached_data
if current_time - timestamp < ttl_seconds:
logger.debug(f"Cache hit for {target_func.__name__}")
return result
# Slow path: acquire lock for cache miss/expiry
async with _get_cache_lock():
# Double-check: another coroutine might have populated cache
if key in cache_storage:
if ttl_seconds is None:
return cache_storage[key]
# Check if we have a valid cached entry
if key in cache_storage:
if ttl_seconds is None:
# No TTL - return cached result directly
logger.debug(
f"Cache hit for {async_func.__name__} with key: {str(key)[:50]}"
)
return cast(R, cache_storage[key])
else:
# With TTL - check expiration
cached_data = cache_storage[key]
if isinstance(cached_data, tuple):
result, timestamp = cached_data
if current_time - timestamp < ttl_seconds:
logger.debug(
f"Cache hit for {async_func.__name__} with key: {str(key)[:50]}"
)
return cast(R, result)
else:
cached_data = cache_storage[key]
if isinstance(cached_data, tuple):
result, timestamp = cached_data
if current_time - timestamp < ttl_seconds:
return result
# Expired entry
del cache_storage[key]
logger.debug(
f"Cache entry expired for {async_func.__name__}"
)
# Cache miss - execute function
logger.debug(f"Cache miss for {target_func.__name__}")
result = await target_func(*args, **kwargs)
# Cache miss or expired - fetch fresh data
logger.debug(
f"Cache miss for {async_func.__name__} with key: {str(key)[:50]}"
)
result = await async_func(*args, **kwargs)
# Store result
if ttl_seconds is None:
cache_storage[key] = result
else:
cache_storage[key] = (result, current_time)
# Store in cache
if ttl_seconds is None:
cache_storage[key] = result
else:
cache_storage[key] = (result, current_time)
# Cleanup if needed
if len(cache_storage) > maxsize:
cutoff = maxsize // 2
oldest_keys = (
list(cache_storage.keys())[:-cutoff] if cutoff > 0 else []
)
for old_key in oldest_keys:
cache_storage.pop(old_key, None)
# Simple cleanup when cache gets too large
if len(cache_storage) > maxsize:
# Remove oldest entries (simple FIFO cleanup)
cutoff = maxsize // 2
oldest_keys = list(cache_storage.keys())[:-cutoff] if cutoff > 0 else []
for old_key in oldest_keys:
cache_storage.pop(old_key, None)
logger.debug(
f"Cache cleanup: removed {len(oldest_keys)} entries for {async_func.__name__}"
)
return result
return result
wrapper = async_wrapper
else:
# Sync function with threading.Lock
cache_lock = threading.Lock()
@wraps(target_func)
def sync_wrapper(*args: P.args, **kwargs: P.kwargs):
key = _make_hashable_key(args, kwargs)
current_time = time.time()
# Fast path: check cache without lock
if key in cache_storage:
if ttl_seconds is None:
logger.debug(f"Cache hit for {target_func.__name__}")
return cache_storage[key]
else:
cached_data = cache_storage[key]
if isinstance(cached_data, tuple):
result, timestamp = cached_data
if current_time - timestamp < ttl_seconds:
logger.debug(f"Cache hit for {target_func.__name__}")
return result
# Slow path: acquire lock for cache miss/expiry
with cache_lock:
# Double-check: another thread might have populated cache
if key in cache_storage:
if ttl_seconds is None:
return cache_storage[key]
else:
cached_data = cache_storage[key]
if isinstance(cached_data, tuple):
result, timestamp = cached_data
if current_time - timestamp < ttl_seconds:
return result
# Cache miss - execute function
logger.debug(f"Cache miss for {target_func.__name__}")
result = target_func(*args, **kwargs)
# Store result
if ttl_seconds is None:
cache_storage[key] = result
else:
cache_storage[key] = (result, current_time)
# Cleanup if needed
if len(cache_storage) > maxsize:
cutoff = maxsize // 2
oldest_keys = (
list(cache_storage.keys())[:-cutoff] if cutoff > 0 else []
)
for old_key in oldest_keys:
cache_storage.pop(old_key, None)
return result
wrapper = sync_wrapper
# Add cache management methods
# Add cache management methods (similar to functools.lru_cache)
def cache_clear() -> None:
cache_storage.clear()
@@ -256,84 +199,68 @@ def cached(
"ttl_seconds": ttl_seconds,
}
def cache_delete(*args, **kwargs) -> bool:
"""Delete a specific cache entry. Returns True if entry existed."""
key = _make_hashable_key(args, kwargs)
if key in cache_storage:
del cache_storage[key]
return True
return False
# Attach methods to wrapper
setattr(wrapper, "cache_clear", cache_clear)
setattr(wrapper, "cache_info", cache_info)
setattr(wrapper, "cache_delete", cache_delete)
return cast(CachedFunction, wrapper)
return cast(AsyncCachedFunction[P, R], wrapper)
return decorator
def thread_cached(func):
"""
Thread-local cache decorator for both sync and async functions.
@overload
def async_cache(
func: Callable[P, Awaitable[R]],
) -> AsyncCachedFunction[P, R]:
pass
Each thread gets its own cache, which is useful for request-scoped caching
in web applications where you want to cache within a single request but
not across requests.
@overload
def async_cache(
func: None = None,
*,
maxsize: int = 128,
) -> Callable[[Callable[P, Awaitable[R]]], AsyncCachedFunction[P, R]]:
pass
def async_cache(
func: Callable[P, Awaitable[R]] | None = None,
*,
maxsize: int = 128,
) -> (
AsyncCachedFunction[P, R]
| Callable[[Callable[P, Awaitable[R]]], AsyncCachedFunction[P, R]]
):
"""
Process-level cache decorator for async functions (no TTL).
Similar to functools.lru_cache but works with async functions.
This is a convenience wrapper around async_ttl_cache with ttl_seconds=None.
Args:
func: The function to cache
func: The async function to cache (when used without parentheses)
maxsize: Maximum number of cached entries
Returns:
Decorated function with thread-local caching
Decorated function or decorator
Example:
@thread_cached
def expensive_operation(param: str) -> dict:
# Without parentheses (uses default maxsize=128)
@async_cache
async def get_data(param: str) -> dict:
return {"result": param}
@thread_cached # Works with async too
async def expensive_async_operation(param: str) -> dict:
# With parentheses and custom maxsize
@async_cache(maxsize=1000)
async def expensive_computation(param: str) -> dict:
# Expensive computation here
return {"result": param}
"""
thread_local = threading.local()
def _clear():
if hasattr(thread_local, "cache"):
del thread_local.cache
if inspect.iscoroutinefunction(func):
@wraps(func)
async def async_wrapper(*args, **kwargs):
cache = getattr(thread_local, "cache", None)
if cache is None:
cache = thread_local.cache = {}
key = _make_hashable_key(args, kwargs)
if key not in cache:
cache[key] = await func(*args, **kwargs)
return cache[key]
setattr(async_wrapper, "clear_cache", _clear)
return async_wrapper
if func is None:
# Called with parentheses @async_cache() or @async_cache(maxsize=...)
return async_ttl_cache(maxsize=maxsize, ttl_seconds=None)
else:
@wraps(func)
def sync_wrapper(*args, **kwargs):
cache = getattr(thread_local, "cache", None)
if cache is None:
cache = thread_local.cache = {}
key = _make_hashable_key(args, kwargs)
if key not in cache:
cache[key] = func(*args, **kwargs)
return cache[key]
setattr(sync_wrapper, "clear_cache", _clear)
return sync_wrapper
def clear_thread_cache(func: Callable) -> None:
"""Clear thread-local cache for a function."""
if clear := getattr(func, "clear_cache", None):
clear()
# Called without parentheses @async_cache
decorator = async_ttl_cache(maxsize=maxsize, ttl_seconds=None)
return decorator(func)

View File

@@ -16,7 +16,12 @@ from unittest.mock import Mock
import pytest
from autogpt_libs.utils.cache import cached, clear_thread_cache, thread_cached
from autogpt_libs.utils.cache import (
async_cache,
async_ttl_cache,
clear_thread_cache,
thread_cached,
)
class TestThreadCached:
@@ -325,202 +330,102 @@ class TestThreadCached:
assert mock.call_count == 2
class TestCache:
"""Tests for the unified @cache decorator (works for both sync and async)."""
def test_basic_sync_caching(self):
"""Test basic sync caching functionality."""
call_count = 0
@cached()
def expensive_sync_function(x: int, y: int = 0) -> int:
nonlocal call_count
call_count += 1
return x + y
# First call
result1 = expensive_sync_function(1, 2)
assert result1 == 3
assert call_count == 1
# Second call with same args - should use cache
result2 = expensive_sync_function(1, 2)
assert result2 == 3
assert call_count == 1
# Different args - should call function again
result3 = expensive_sync_function(2, 3)
assert result3 == 5
assert call_count == 2
class TestAsyncTTLCache:
"""Tests for the @async_ttl_cache decorator."""
@pytest.mark.asyncio
async def test_basic_async_caching(self):
"""Test basic async caching functionality."""
async def test_basic_caching(self):
"""Test basic caching functionality."""
call_count = 0
@cached()
async def expensive_async_function(x: int, y: int = 0) -> int:
@async_ttl_cache(maxsize=10, ttl_seconds=60)
async def cached_function(x: int, y: int = 0) -> int:
nonlocal call_count
call_count += 1
await asyncio.sleep(0.01) # Simulate async work
return x + y
# First call
result1 = await expensive_async_function(1, 2)
result1 = await cached_function(1, 2)
assert result1 == 3
assert call_count == 1
# Second call with same args - should use cache
result2 = await expensive_async_function(1, 2)
result2 = await cached_function(1, 2)
assert result2 == 3
assert call_count == 1
assert call_count == 1 # No additional call
# Different args - should call function again
result3 = await expensive_async_function(2, 3)
result3 = await cached_function(2, 3)
assert result3 == 5
assert call_count == 2
def test_sync_thundering_herd_protection(self):
"""Test that concurrent sync calls don't cause thundering herd."""
call_count = 0
results = []
@cached()
def slow_function(x: int) -> int:
nonlocal call_count
call_count += 1
time.sleep(0.1) # Simulate expensive operation
return x * x
def worker():
result = slow_function(5)
results.append(result)
# Launch multiple concurrent threads
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(worker) for _ in range(5)]
for future in futures:
future.result()
# All results should be the same
assert all(result == 25 for result in results)
# Only one thread should have executed the expensive operation
assert call_count == 1
@pytest.mark.asyncio
async def test_async_thundering_herd_protection(self):
"""Test that concurrent async calls don't cause thundering herd."""
async def test_ttl_expiration(self):
"""Test that cache entries expire after TTL."""
call_count = 0
@cached()
async def slow_async_function(x: int) -> int:
@async_ttl_cache(maxsize=10, ttl_seconds=1) # Short TTL
async def short_lived_cache(x: int) -> int:
nonlocal call_count
call_count += 1
await asyncio.sleep(0.1) # Simulate expensive operation
return x * x
# Launch concurrent coroutines
tasks = [slow_async_function(7) for _ in range(5)]
results = await asyncio.gather(*tasks)
# All results should be the same
assert all(result == 49 for result in results)
# Only one coroutine should have executed the expensive operation
assert call_count == 1
def test_ttl_functionality(self):
"""Test TTL functionality with sync function."""
call_count = 0
@cached(maxsize=10, ttl_seconds=1) # Short TTL
def ttl_function(x: int) -> int:
nonlocal call_count
call_count += 1
return x * 3
return x * 2
# First call
result1 = ttl_function(3)
assert result1 == 9
result1 = await short_lived_cache(5)
assert result1 == 10
assert call_count == 1
# Second call immediately - should use cache
result2 = ttl_function(3)
assert result2 == 9
assert call_count == 1
# Wait for TTL to expire
time.sleep(1.1)
# Third call after expiration - should call function again
result3 = ttl_function(3)
assert result3 == 9
assert call_count == 2
@pytest.mark.asyncio
async def test_async_ttl_functionality(self):
"""Test TTL functionality with async function."""
call_count = 0
@cached(maxsize=10, ttl_seconds=1) # Short TTL
async def async_ttl_function(x: int) -> int:
nonlocal call_count
call_count += 1
await asyncio.sleep(0.01)
return x * 4
# First call
result1 = await async_ttl_function(3)
assert result1 == 12
assert call_count == 1
# Second call immediately - should use cache
result2 = await async_ttl_function(3)
assert result2 == 12
result2 = await short_lived_cache(5)
assert result2 == 10
assert call_count == 1
# Wait for TTL to expire
await asyncio.sleep(1.1)
# Third call after expiration - should call function again
result3 = await async_ttl_function(3)
assert result3 == 12
result3 = await short_lived_cache(5)
assert result3 == 10
assert call_count == 2
def test_cache_info(self):
@pytest.mark.asyncio
async def test_cache_info(self):
"""Test cache info functionality."""
@cached(maxsize=10, ttl_seconds=60)
def info_test_function(x: int) -> int:
@async_ttl_cache(maxsize=5, ttl_seconds=300)
async def info_test_function(x: int) -> int:
return x * 3
# Check initial cache info
info = info_test_function.cache_info()
assert info["size"] == 0
assert info["maxsize"] == 10
assert info["ttl_seconds"] == 60
assert info["maxsize"] == 5
assert info["ttl_seconds"] == 300
# Add an entry
info_test_function(1)
await info_test_function(1)
info = info_test_function.cache_info()
assert info["size"] == 1
def test_cache_clear(self):
@pytest.mark.asyncio
async def test_cache_clear(self):
"""Test cache clearing functionality."""
call_count = 0
@cached()
def clearable_function(x: int) -> int:
@async_ttl_cache(maxsize=10, ttl_seconds=60)
async def clearable_function(x: int) -> int:
nonlocal call_count
call_count += 1
return x * 4
# First call
result1 = clearable_function(2)
result1 = await clearable_function(2)
assert result1 == 8
assert call_count == 1
# Second call - should use cache
result2 = clearable_function(2)
result2 = await clearable_function(2)
assert result2 == 8
assert call_count == 1
@@ -528,149 +433,273 @@ class TestCache:
clearable_function.cache_clear()
# Third call after clear - should call function again
result3 = clearable_function(2)
result3 = await clearable_function(2)
assert result3 == 8
assert call_count == 2
@pytest.mark.asyncio
async def test_async_cache_clear(self):
"""Test cache clearing functionality with async function."""
async def test_maxsize_cleanup(self):
"""Test that cache cleans up when maxsize is exceeded."""
call_count = 0
@cached()
async def async_clearable_function(x: int) -> int:
@async_ttl_cache(maxsize=3, ttl_seconds=60)
async def size_limited_function(x: int) -> int:
nonlocal call_count
call_count += 1
await asyncio.sleep(0.01)
return x * 5
return x**2
# First call
result1 = await async_clearable_function(2)
# Fill cache to maxsize
await size_limited_function(1) # call_count: 1
await size_limited_function(2) # call_count: 2
await size_limited_function(3) # call_count: 3
info = size_limited_function.cache_info()
assert info["size"] == 3
# Add one more entry - should trigger cleanup
await size_limited_function(4) # call_count: 4
# Cache size should be reduced (cleanup removes oldest entries)
info = size_limited_function.cache_info()
assert info["size"] is not None and info["size"] <= 3 # Should be cleaned up
@pytest.mark.asyncio
async def test_argument_variations(self):
"""Test caching with different argument patterns."""
call_count = 0
@async_ttl_cache(maxsize=10, ttl_seconds=60)
async def arg_test_function(a: int, b: str = "default", *, c: int = 100) -> str:
nonlocal call_count
call_count += 1
return f"{a}-{b}-{c}"
# Different ways to call with same logical arguments
result1 = await arg_test_function(1, "test", c=200)
assert call_count == 1
# Same arguments, same order - should use cache
result2 = await arg_test_function(1, "test", c=200)
assert call_count == 1
assert result1 == result2
# Different arguments - should call function
result3 = await arg_test_function(2, "test", c=200)
assert call_count == 2
assert result1 != result3
@pytest.mark.asyncio
async def test_exception_handling(self):
"""Test that exceptions are not cached."""
call_count = 0
@async_ttl_cache(maxsize=10, ttl_seconds=60)
async def exception_function(x: int) -> int:
nonlocal call_count
call_count += 1
if x < 0:
raise ValueError("Negative value not allowed")
return x * 2
# Successful call - should be cached
result1 = await exception_function(5)
assert result1 == 10
assert call_count == 1
# Second call - should use cache
result2 = await async_clearable_function(2)
# Same successful call - should use cache
result2 = await exception_function(5)
assert result2 == 10
assert call_count == 1
# Clear cache
async_clearable_function.cache_clear()
# Third call after clear - should call function again
result3 = await async_clearable_function(2)
assert result3 == 10
# Exception call - should not be cached
with pytest.raises(ValueError):
await exception_function(-1)
assert call_count == 2
# Same exception call - should call again (not cached)
with pytest.raises(ValueError):
await exception_function(-1)
assert call_count == 3
@pytest.mark.asyncio
async def test_async_function_returns_results_not_coroutines(self):
"""Test that cached async functions return actual results, not coroutines."""
async def test_concurrent_calls(self):
"""Test caching behavior with concurrent calls."""
call_count = 0
@cached()
async def async_result_function(x: int) -> str:
@async_ttl_cache(maxsize=10, ttl_seconds=60)
async def concurrent_function(x: int) -> int:
nonlocal call_count
call_count += 1
await asyncio.sleep(0.01)
return f"result_{x}"
await asyncio.sleep(0.05) # Simulate work
return x * x
# Launch concurrent calls with same arguments
tasks = [concurrent_function(3) for _ in range(5)]
results = await asyncio.gather(*tasks)
# All results should be the same
assert all(result == 9 for result in results)
# Note: Due to race conditions, call_count might be up to 5 for concurrent calls
# This tests that the cache doesn't break under concurrent access
assert 1 <= call_count <= 5
class TestAsyncCache:
"""Tests for the @async_cache decorator (no TTL)."""
@pytest.mark.asyncio
async def test_basic_caching_no_ttl(self):
"""Test basic caching functionality without TTL."""
call_count = 0
@async_cache(maxsize=10)
async def cached_function(x: int, y: int = 0) -> int:
nonlocal call_count
call_count += 1
await asyncio.sleep(0.01) # Simulate async work
return x + y
# First call
result1 = await async_result_function(1)
assert result1 == "result_1"
assert isinstance(result1, str) # Should be string, not coroutine
result1 = await cached_function(1, 2)
assert result1 == 3
assert call_count == 1
# Second call - should return cached result (string), not coroutine
result2 = await async_result_function(1)
assert result2 == "result_1"
assert isinstance(result2, str) # Should be string, not coroutine
assert call_count == 1 # Function should not be called again
# Second call with same args - should use cache
result2 = await cached_function(1, 2)
assert result2 == 3
assert call_count == 1 # No additional call
# Verify results are identical
assert result1 is result2 # Should be same cached object
# Third call after some time - should still use cache (no TTL)
await asyncio.sleep(0.05)
result3 = await cached_function(1, 2)
assert result3 == 3
assert call_count == 1 # Still no additional call
def test_cache_delete(self):
"""Test selective cache deletion functionality."""
call_count = 0
@cached()
def deletable_function(x: int) -> int:
nonlocal call_count
call_count += 1
return x * 6
# First call for x=1
result1 = deletable_function(1)
assert result1 == 6
assert call_count == 1
# First call for x=2
result2 = deletable_function(2)
assert result2 == 12
# Different args - should call function again
result4 = await cached_function(2, 3)
assert result4 == 5
assert call_count == 2
# Second calls - should use cache
assert deletable_function(1) == 6
assert deletable_function(2) == 12
assert call_count == 2
# Delete specific entry for x=1
was_deleted = deletable_function.cache_delete(1)
assert was_deleted is True
# Call with x=1 should execute function again
result3 = deletable_function(1)
assert result3 == 6
assert call_count == 3
# Call with x=2 should still use cache
assert deletable_function(2) == 12
assert call_count == 3
# Try to delete non-existent entry
was_deleted = deletable_function.cache_delete(99)
assert was_deleted is False
@pytest.mark.asyncio
async def test_async_cache_delete(self):
"""Test selective cache deletion functionality with async function."""
async def test_no_ttl_vs_ttl_behavior(self):
"""Test the difference between TTL and no-TTL caching."""
ttl_call_count = 0
no_ttl_call_count = 0
@async_ttl_cache(maxsize=10, ttl_seconds=1) # Short TTL
async def ttl_function(x: int) -> int:
nonlocal ttl_call_count
ttl_call_count += 1
return x * 2
@async_cache(maxsize=10) # No TTL
async def no_ttl_function(x: int) -> int:
nonlocal no_ttl_call_count
no_ttl_call_count += 1
return x * 2
# First calls
await ttl_function(5)
await no_ttl_function(5)
assert ttl_call_count == 1
assert no_ttl_call_count == 1
# Wait for TTL to expire
await asyncio.sleep(1.1)
# Second calls after TTL expiry
await ttl_function(5) # Should call function again (TTL expired)
await no_ttl_function(5) # Should use cache (no TTL)
assert ttl_call_count == 2 # TTL function called again
assert no_ttl_call_count == 1 # No-TTL function still cached
@pytest.mark.asyncio
async def test_async_cache_info(self):
"""Test cache info for no-TTL cache."""
@async_cache(maxsize=5)
async def info_test_function(x: int) -> int:
return x * 3
# Check initial cache info
info = info_test_function.cache_info()
assert info["size"] == 0
assert info["maxsize"] == 5
assert info["ttl_seconds"] is None # No TTL
# Add an entry
await info_test_function(1)
info = info_test_function.cache_info()
assert info["size"] == 1
class TestTTLOptional:
"""Tests for optional TTL functionality."""
@pytest.mark.asyncio
async def test_ttl_none_behavior(self):
"""Test that ttl_seconds=None works like no TTL."""
call_count = 0
@cached()
async def async_deletable_function(x: int) -> int:
@async_ttl_cache(maxsize=10, ttl_seconds=None)
async def no_ttl_via_none(x: int) -> int:
nonlocal call_count
call_count += 1
await asyncio.sleep(0.01)
return x * 7
return x**2
# First call for x=1
result1 = await async_deletable_function(1)
assert result1 == 7
# First call
result1 = await no_ttl_via_none(3)
assert result1 == 9
assert call_count == 1
# First call for x=2
result2 = await async_deletable_function(2)
assert result2 == 14
assert call_count == 2
# Wait (would expire if there was TTL)
await asyncio.sleep(0.1)
# Second calls - should use cache
assert await async_deletable_function(1) == 7
assert await async_deletable_function(2) == 14
assert call_count == 2
# Second call - should still use cache
result2 = await no_ttl_via_none(3)
assert result2 == 9
assert call_count == 1 # No additional call
# Delete specific entry for x=1
was_deleted = async_deletable_function.cache_delete(1)
assert was_deleted is True
# Check cache info
info = no_ttl_via_none.cache_info()
assert info["ttl_seconds"] is None
# Call with x=1 should execute function again
result3 = await async_deletable_function(1)
assert result3 == 7
assert call_count == 3
@pytest.mark.asyncio
async def test_cache_options_comparison(self):
"""Test different cache options work as expected."""
ttl_calls = 0
no_ttl_calls = 0
# Call with x=2 should still use cache
assert await async_deletable_function(2) == 14
assert call_count == 3
@async_ttl_cache(maxsize=10, ttl_seconds=1) # With TTL
async def ttl_function(x: int) -> int:
nonlocal ttl_calls
ttl_calls += 1
return x * 10
# Try to delete non-existent entry
was_deleted = async_deletable_function.cache_delete(99)
assert was_deleted is False
@async_cache(maxsize=10) # Process-level cache (no TTL)
async def process_function(x: int) -> int:
nonlocal no_ttl_calls
no_ttl_calls += 1
return x * 10
# Both should cache initially
await ttl_function(3)
await process_function(3)
assert ttl_calls == 1
assert no_ttl_calls == 1
# Immediate second calls - both should use cache
await ttl_function(3)
await process_function(3)
assert ttl_calls == 1
assert no_ttl_calls == 1
# Wait for TTL to expire
await asyncio.sleep(1.1)
# After TTL expiry
await ttl_function(3) # Should call function again
await process_function(3) # Should still use cache
assert ttl_calls == 2 # TTL cache expired, called again
assert no_ttl_calls == 1 # Process cache never expires

View File

@@ -21,7 +21,7 @@ PRISMA_SCHEMA="postgres/schema.prisma"
# Redis Configuration
REDIS_HOST=localhost
REDIS_PORT=6379
# REDIS_PASSWORD=
REDIS_PASSWORD=password
# RabbitMQ Credentials
RABBITMQ_DEFAULT_USER=rabbitmq_user_default
@@ -66,11 +66,6 @@ NVIDIA_API_KEY=
GITHUB_CLIENT_ID=
GITHUB_CLIENT_SECRET=
# Notion OAuth App server credentials - https://developers.notion.com/docs/authorization
# Configure a public integration
NOTION_CLIENT_ID=
NOTION_CLIENT_SECRET=
# Google OAuth App server credentials - https://console.cloud.google.com/apis/credentials, and enable gmail api and set scopes
# https://console.cloud.google.com/apis/credentials/consent ?project=<your_project_id>
# You'll need to add/enable the following scopes (minimum):

View File

@@ -9,12 +9,4 @@ secrets/*
!secrets/.gitkeep
*.ignore.*
*.ign.*
# Load test results and reports
load-tests/*_RESULTS.md
load-tests/*_REPORT.md
load-tests/results/
load-tests/*.json
load-tests/*.log
load-tests/node_modules/*
*.ign.*

View File

@@ -9,15 +9,8 @@ WORKDIR /app
RUN echo 'Acquire::http::Pipeline-Depth 0;\nAcquire::http::No-Cache true;\nAcquire::BrokenProxy true;\n' > /etc/apt/apt.conf.d/99fixbadproxy
# Install Node.js repository key and setup
# Update package list and install Python and build dependencies
RUN apt-get update --allow-releaseinfo-change --fix-missing \
&& apt-get install -y curl ca-certificates gnupg \
&& mkdir -p /etc/apt/keyrings \
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list
# Update package list and install Python, Node.js, and build dependencies
RUN apt-get update \
&& apt-get install -y \
python3.13 \
python3.13-dev \
@@ -27,9 +20,7 @@ RUN apt-get update \
libpq5 \
libz-dev \
libssl-dev \
postgresql-client \
nodejs \
&& rm -rf /var/lib/apt/lists/*
postgresql-client
ENV POETRY_HOME=/opt/poetry
ENV POETRY_NO_INTERACTION=1
@@ -63,18 +54,13 @@ ENV PATH=/opt/poetry/bin:$PATH
# Install Python without upgrading system-managed packages
RUN apt-get update && apt-get install -y \
python3.13 \
python3-pip \
&& rm -rf /var/lib/apt/lists/*
python3-pip
# Copy only necessary files from builder
COPY --from=builder /app /app
COPY --from=builder /usr/local/lib/python3* /usr/local/lib/python3*
COPY --from=builder /usr/local/bin/poetry /usr/local/bin/poetry
# Copy Node.js installation for Prisma
COPY --from=builder /usr/bin/node /usr/bin/node
COPY --from=builder /usr/lib/node_modules /usr/lib/node_modules
COPY --from=builder /usr/bin/npm /usr/bin/npm
COPY --from=builder /usr/bin/npx /usr/bin/npx
# Copy Prisma binaries
COPY --from=builder /root/.cache/prisma-python/binaries /root/.cache/prisma-python/binaries
ENV PATH="/app/autogpt_platform/backend/.venv/bin:$PATH"

View File

@@ -1,3 +1,4 @@
import functools
import importlib
import logging
import os
@@ -5,8 +6,6 @@ import re
from pathlib import Path
from typing import TYPE_CHECKING, TypeVar
from autogpt_libs.utils.cache import cached
logger = logging.getLogger(__name__)
@@ -16,7 +15,7 @@ if TYPE_CHECKING:
T = TypeVar("T")
@cached()
@functools.cache
def load_all_blocks() -> dict[str, type["Block"]]:
from backend.data.block import Block
from backend.util.settings import Config

View File

@@ -1,214 +0,0 @@
from typing import Any
from backend.blocks.llm import (
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
AIBlockBase,
AICredentials,
AICredentialsField,
LlmModel,
LLMResponse,
llm_call,
)
from backend.data.block import BlockCategory, BlockOutput, BlockSchema
from backend.data.model import APIKeyCredentials, NodeExecutionStats, SchemaField
class AIConditionBlock(AIBlockBase):
"""
An AI-powered condition block that uses natural language to evaluate conditions.
This block allows users to define conditions in plain English (e.g., "the input is an email address",
"the input is a city in the USA") and uses AI to determine if the input satisfies the condition.
It provides the same yes/no data pass-through functionality as the standard ConditionBlock.
"""
class Input(BlockSchema):
input_value: Any = SchemaField(
description="The input value to evaluate with the AI condition",
placeholder="Enter the value to be evaluated (text, number, or any data)",
)
condition: str = SchemaField(
description="A plaintext English description of the condition to evaluate",
placeholder="E.g., 'the input is the body of an email', 'the input is a City in the USA', 'the input is an error or a refusal'",
)
yes_value: Any = SchemaField(
description="(Optional) Value to output if the condition is true. If not provided, input_value will be used.",
placeholder="Leave empty to use input_value, or enter a specific value",
default=None,
)
no_value: Any = SchemaField(
description="(Optional) Value to output if the condition is false. If not provided, input_value will be used.",
placeholder="Leave empty to use input_value, or enter a specific value",
default=None,
)
model: LlmModel = SchemaField(
title="LLM Model",
default=LlmModel.GPT4O,
description="The language model to use for evaluating the condition.",
advanced=False,
)
credentials: AICredentials = AICredentialsField()
class Output(BlockSchema):
result: bool = SchemaField(
description="The result of the AI condition evaluation (True or False)"
)
yes_output: Any = SchemaField(
description="The output value if the condition is true"
)
no_output: Any = SchemaField(
description="The output value if the condition is false"
)
error: str = SchemaField(
description="Error message if the AI evaluation is uncertain or fails"
)
def __init__(self):
super().__init__(
id="553ec5b8-6c45-4299-8d75-b394d05f72ff",
input_schema=AIConditionBlock.Input,
output_schema=AIConditionBlock.Output,
description="Uses AI to evaluate natural language conditions and provide conditional outputs",
categories={BlockCategory.AI, BlockCategory.LOGIC},
test_input={
"input_value": "john@example.com",
"condition": "the input is an email address",
"yes_value": "Valid email",
"no_value": "Not an email",
"model": LlmModel.GPT4O,
"credentials": TEST_CREDENTIALS_INPUT,
},
test_credentials=TEST_CREDENTIALS,
test_output=[
("result", True),
("yes_output", "Valid email"),
],
test_mock={
"llm_call": lambda *args, **kwargs: LLMResponse(
raw_response="",
prompt=[],
response="true",
tool_calls=None,
prompt_tokens=50,
completion_tokens=10,
reasoning=None,
)
},
)
async def llm_call(
self,
credentials: APIKeyCredentials,
llm_model: LlmModel,
prompt: list,
max_tokens: int,
) -> LLMResponse:
"""Wrapper method for llm_call to enable mocking in tests."""
return await llm_call(
credentials=credentials,
llm_model=llm_model,
prompt=prompt,
force_json_output=False,
max_tokens=max_tokens,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
"""
Evaluate the AI condition and return appropriate outputs.
"""
# Prepare the yes and no values, using input_value as default
yes_value = (
input_data.yes_value
if input_data.yes_value is not None
else input_data.input_value
)
no_value = (
input_data.no_value
if input_data.no_value is not None
else input_data.input_value
)
# Convert input_value to string for AI evaluation
input_str = str(input_data.input_value)
# Create the prompt for AI evaluation
prompt = [
{
"role": "system",
"content": (
"You are an AI assistant that evaluates conditions based on input data. "
"You must respond with only 'true' or 'false' (lowercase) to indicate whether "
"the given condition is met by the input value. Be accurate and consider the "
"context and meaning of both the input and the condition."
),
},
{
"role": "user",
"content": (
f"Input value: {input_str}\n"
f"Condition to evaluate: {input_data.condition}\n\n"
f"Does the input value satisfy the condition? Respond with only 'true' or 'false'."
),
},
]
# Call the LLM
try:
response = await self.llm_call(
credentials=credentials,
llm_model=input_data.model,
prompt=prompt,
max_tokens=10, # We only expect a true/false response
)
# Extract the boolean result from the response
response_text = response.response.strip().lower()
if response_text == "true":
result = True
elif response_text == "false":
result = False
else:
# If the response is not clear, try to interpret it using word boundaries
import re
# Use word boundaries to avoid false positives like 'untrue' or '10'
tokens = set(re.findall(r"\b(true|false|yes|no|1|0)\b", response_text))
if tokens == {"true"} or tokens == {"yes"} or tokens == {"1"}:
result = True
elif tokens == {"false"} or tokens == {"no"} or tokens == {"0"}:
result = False
else:
# Unclear or conflicting response - default to False and yield error
result = False
yield "error", f"Unclear AI response: '{response.response}'"
# Update internal stats
self.merge_stats(
NodeExecutionStats(
input_token_count=response.prompt_tokens,
output_token_count=response.completion_tokens,
)
)
self.prompt = response.prompt
except Exception as e:
# In case of any error, default to False to be safe
result = False
# Log the error but don't fail the block execution
import logging
logger = logging.getLogger(__name__)
logger.error(f"AI condition evaluation failed: {str(e)}")
yield "error", f"AI evaluation failed: {str(e)}"
# Yield results
yield "result", result
if result:
yield "yes_output", yes_value
else:
yield "no_output", no_value

View File

@@ -241,7 +241,6 @@ class AirtableCreateRecordsBlock(Block):
class Output(BlockSchema):
records: list[dict] = SchemaField(description="Array of created record objects")
details: dict = SchemaField(description="Details of the created records")
def __init__(self):
super().__init__(
@@ -280,9 +279,6 @@ class AirtableCreateRecordsBlock(Block):
result_records = normalized_data["records"]
yield "records", result_records
details = data.get("details", None)
if details:
yield "details", details
class AirtableUpdateRecordsBlock(Block):

View File

@@ -1,10 +1,8 @@
from enum import Enum
from typing import Any, Literal, Optional
from typing import Literal
from e2b_code_interpreter import AsyncSandbox
from e2b_code_interpreter import Result as E2BExecutionResult
from e2b_code_interpreter.charts import Chart as E2BExecutionResultChart
from pydantic import BaseModel, JsonValue, SecretStr
from pydantic import SecretStr
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import (
@@ -38,135 +36,14 @@ class ProgrammingLanguage(Enum):
JAVA = "java"
class MainCodeExecutionResult(BaseModel):
"""
*Pydantic model mirroring `e2b_code_interpreter.Result`*
Represents the data to be displayed as a result of executing a cell in a Jupyter notebook.
The result is similar to the structure returned by ipython kernel: https://ipython.readthedocs.io/en/stable/development/execution.html#execution-semantics
The result can contain multiple types of data, such as text, images, plots, etc. Each type of data is represented
as a string, and the result can contain multiple types of data. The display calls don't have to have text representation,
for the actual result the representation is always present for the result, the other representations are always optional.
""" # noqa
class Chart(BaseModel, E2BExecutionResultChart):
pass
text: Optional[str] = None
html: Optional[str] = None
markdown: Optional[str] = None
svg: Optional[str] = None
png: Optional[str] = None
jpeg: Optional[str] = None
pdf: Optional[str] = None
latex: Optional[str] = None
json: Optional[JsonValue] = None # type: ignore (reportIncompatibleMethodOverride)
javascript: Optional[str] = None
data: Optional[dict] = None
chart: Optional[Chart] = None
extra: Optional[dict] = None
"""Extra data that can be included. Not part of the standard types."""
class CodeExecutionResult(MainCodeExecutionResult):
__doc__ = MainCodeExecutionResult.__doc__
is_main_result: bool = False
"""Whether this data is the main result of the cell. Data can be produced by display calls of which can be multiple in a cell.""" # noqa
class BaseE2BExecutorMixin:
"""Shared implementation methods for E2B executor blocks."""
async def execute_code(
self,
api_key: str,
code: str,
language: ProgrammingLanguage,
template_id: str = "",
setup_commands: Optional[list[str]] = None,
timeout: Optional[int] = None,
sandbox_id: Optional[str] = None,
dispose_sandbox: bool = False,
):
"""
Unified code execution method that handles all three use cases:
1. Create new sandbox and execute (ExecuteCodeBlock)
2. Create new sandbox, execute, and return sandbox_id (InstantiateCodeSandboxBlock)
3. Connect to existing sandbox and execute (ExecuteCodeStepBlock)
""" # noqa
sandbox = None
try:
if sandbox_id:
# Connect to existing sandbox (ExecuteCodeStepBlock case)
sandbox = await AsyncSandbox.connect(
sandbox_id=sandbox_id, api_key=api_key
)
else:
# Create new sandbox (ExecuteCodeBlock/InstantiateCodeSandboxBlock case)
sandbox = await AsyncSandbox.create(
api_key=api_key, template=template_id, timeout=timeout
)
if setup_commands:
for cmd in setup_commands:
await sandbox.commands.run(cmd)
# Execute the code
execution = await sandbox.run_code(
code,
language=language.value,
on_error=lambda e: sandbox.kill(), # Kill the sandbox on error
)
if execution.error:
raise Exception(execution.error)
results = execution.results
text_output = execution.text
stdout_logs = "".join(execution.logs.stdout)
stderr_logs = "".join(execution.logs.stderr)
return results, text_output, stdout_logs, stderr_logs, sandbox.sandbox_id
finally:
# Dispose of sandbox if requested to reduce usage costs
if dispose_sandbox and sandbox:
await sandbox.kill()
def process_execution_results(
self, results: list[E2BExecutionResult]
) -> tuple[dict[str, Any] | None, list[dict[str, Any]]]:
"""Process and filter execution results."""
# Filter out empty formats and convert to dicts
processed_results = [
{
f: value
for f in [*r.formats(), "extra", "is_main_result"]
if (value := getattr(r, f, None)) is not None
}
for r in results
]
if main_result := next(
(r for r in processed_results if r.get("is_main_result")), None
):
# Make main_result a copy we can modify & remove is_main_result
(main_result := {**main_result}).pop("is_main_result")
return main_result, processed_results
class ExecuteCodeBlock(Block, BaseE2BExecutorMixin):
class CodeExecutionBlock(Block):
# TODO : Add support to upload and download files
# NOTE: Currently, you can only customize the CPU and Memory
# by creating a pre customized sandbox template
# Currently, You can customized the CPU and Memory, only by creating a pre customized sandbox template
class Input(BlockSchema):
credentials: CredentialsMetaInput[
Literal[ProviderName.E2B], Literal["api_key"]
] = CredentialsField(
description=(
"Enter your API key for the E2B platform. "
"You can get it in here - https://e2b.dev/docs"
),
description="Enter your api key for the E2B Sandbox. You can get it in here - https://e2b.dev/docs",
)
# Todo : Option to run commond in background
@@ -199,14 +76,6 @@ class ExecuteCodeBlock(Block, BaseE2BExecutorMixin):
description="Execution timeout in seconds", default=300
)
dispose_sandbox: bool = SchemaField(
description=(
"Whether to dispose of the sandbox immediately after execution. "
"If disabled, the sandbox will run until its timeout expires."
),
default=True,
)
template_id: str = SchemaField(
description=(
"You can use an E2B sandbox template by entering its ID here. "
@@ -218,16 +87,7 @@ class ExecuteCodeBlock(Block, BaseE2BExecutorMixin):
)
class Output(BlockSchema):
main_result: MainCodeExecutionResult = SchemaField(
title="Main Result", description="The main result from the code execution"
)
results: list[CodeExecutionResult] = SchemaField(
description="List of results from the code execution"
)
response: str = SchemaField(
title="Main Text Output",
description="Text output (if any) of the main execution result",
)
response: str = SchemaField(description="Response from code execution")
stdout_logs: str = SchemaField(
description="Standard output logs from execution"
)
@@ -237,10 +97,10 @@ class ExecuteCodeBlock(Block, BaseE2BExecutorMixin):
def __init__(self):
super().__init__(
id="0b02b072-abe7-11ef-8372-fb5d162dd712",
description="Executes code in a sandbox environment with internet access.",
description="Executes code in an isolated sandbox environment with internet access.",
categories={BlockCategory.DEVELOPER_TOOLS},
input_schema=ExecuteCodeBlock.Input,
output_schema=ExecuteCodeBlock.Output,
input_schema=CodeExecutionBlock.Input,
output_schema=CodeExecutionBlock.Output,
test_credentials=TEST_CREDENTIALS,
test_input={
"credentials": TEST_CREDENTIALS_INPUT,
@@ -251,59 +111,91 @@ class ExecuteCodeBlock(Block, BaseE2BExecutorMixin):
"template_id": "",
},
test_output=[
("results", []),
("response", "Hello World"),
("stdout_logs", "Hello World\n"),
],
test_mock={
"execute_code": lambda api_key, code, language, template_id, setup_commands, timeout, dispose_sandbox: ( # noqa
[], # results
"Hello World", # text_output
"Hello World\n", # stdout_logs
"", # stderr_logs
"sandbox_id", # sandbox_id
"execute_code": lambda code, language, setup_commands, timeout, api_key, template_id: (
"Hello World",
"Hello World\n",
"",
),
},
)
async def execute_code(
self,
code: str,
language: ProgrammingLanguage,
setup_commands: list[str],
timeout: int,
api_key: str,
template_id: str,
):
try:
sandbox = None
if template_id:
sandbox = await AsyncSandbox.create(
template=template_id, api_key=api_key, timeout=timeout
)
else:
sandbox = await AsyncSandbox.create(api_key=api_key, timeout=timeout)
if not sandbox:
raise Exception("Sandbox not created")
# Running setup commands
for cmd in setup_commands:
await sandbox.commands.run(cmd)
# Executing the code
execution = await sandbox.run_code(
code,
language=language.value,
on_error=lambda e: sandbox.kill(), # Kill the sandbox if there is an error
)
if execution.error:
raise Exception(execution.error)
response = execution.text
stdout_logs = "".join(execution.logs.stdout)
stderr_logs = "".join(execution.logs.stderr)
return response, stdout_logs, stderr_logs
except Exception as e:
raise e
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
try:
results, text_output, stdout, stderr, _ = await self.execute_code(
api_key=credentials.api_key.get_secret_value(),
code=input_data.code,
language=input_data.language,
template_id=input_data.template_id,
setup_commands=input_data.setup_commands,
timeout=input_data.timeout,
dispose_sandbox=input_data.dispose_sandbox,
response, stdout_logs, stderr_logs = await self.execute_code(
input_data.code,
input_data.language,
input_data.setup_commands,
input_data.timeout,
credentials.api_key.get_secret_value(),
input_data.template_id,
)
# Determine result object shape & filter out empty formats
main_result, results = self.process_execution_results(results)
if main_result:
yield "main_result", main_result
yield "results", results
if text_output:
yield "response", text_output
if stdout:
yield "stdout_logs", stdout
if stderr:
yield "stderr_logs", stderr
if response:
yield "response", response
if stdout_logs:
yield "stdout_logs", stdout_logs
if stderr_logs:
yield "stderr_logs", stderr_logs
except Exception as e:
yield "error", str(e)
class InstantiateCodeSandboxBlock(Block, BaseE2BExecutorMixin):
class InstantiationBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput[
Literal[ProviderName.E2B], Literal["api_key"]
] = CredentialsField(
description=(
"Enter your API key for the E2B platform. "
"You can get it in here - https://e2b.dev/docs"
)
description="Enter your api key for the E2B Sandbox. You can get it in here - https://e2b.dev/docs",
)
# Todo : Option to run commond in background
@@ -348,10 +240,7 @@ class InstantiateCodeSandboxBlock(Block, BaseE2BExecutorMixin):
class Output(BlockSchema):
sandbox_id: str = SchemaField(description="ID of the sandbox instance")
response: str = SchemaField(
title="Text Result",
description="Text result (if any) of the setup code execution",
)
response: str = SchemaField(description="Response from code execution")
stdout_logs: str = SchemaField(
description="Standard output logs from execution"
)
@@ -361,13 +250,10 @@ class InstantiateCodeSandboxBlock(Block, BaseE2BExecutorMixin):
def __init__(self):
super().__init__(
id="ff0861c9-1726-4aec-9e5b-bf53f3622112",
description=(
"Instantiate a sandbox environment with internet access "
"in which you can execute code with the Execute Code Step block."
),
description="Instantiate an isolated sandbox environment with internet access where to execute code in.",
categories={BlockCategory.DEVELOPER_TOOLS},
input_schema=InstantiateCodeSandboxBlock.Input,
output_schema=InstantiateCodeSandboxBlock.Output,
input_schema=InstantiationBlock.Input,
output_schema=InstantiationBlock.Output,
test_credentials=TEST_CREDENTIALS,
test_input={
"credentials": TEST_CREDENTIALS_INPUT,
@@ -383,12 +269,11 @@ class InstantiateCodeSandboxBlock(Block, BaseE2BExecutorMixin):
("stdout_logs", "Hello World\n"),
],
test_mock={
"execute_code": lambda api_key, code, language, template_id, setup_commands, timeout: ( # noqa
[], # results
"Hello World", # text_output
"Hello World\n", # stdout_logs
"", # stderr_logs
"sandbox_id", # sandbox_id
"execute_code": lambda setup_code, language, setup_commands, timeout, api_key, template_id: (
"sandbox_id",
"Hello World",
"Hello World\n",
"",
),
},
)
@@ -397,38 +282,78 @@ class InstantiateCodeSandboxBlock(Block, BaseE2BExecutorMixin):
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
try:
_, text_output, stdout, stderr, sandbox_id = await self.execute_code(
api_key=credentials.api_key.get_secret_value(),
code=input_data.setup_code,
language=input_data.language,
template_id=input_data.template_id,
setup_commands=input_data.setup_commands,
timeout=input_data.timeout,
sandbox_id, response, stdout_logs, stderr_logs = await self.execute_code(
input_data.setup_code,
input_data.language,
input_data.setup_commands,
input_data.timeout,
credentials.api_key.get_secret_value(),
input_data.template_id,
)
if sandbox_id:
yield "sandbox_id", sandbox_id
else:
yield "error", "Sandbox ID not found"
if text_output:
yield "response", text_output
if stdout:
yield "stdout_logs", stdout
if stderr:
yield "stderr_logs", stderr
if response:
yield "response", response
if stdout_logs:
yield "stdout_logs", stdout_logs
if stderr_logs:
yield "stderr_logs", stderr_logs
except Exception as e:
yield "error", str(e)
async def execute_code(
self,
code: str,
language: ProgrammingLanguage,
setup_commands: list[str],
timeout: int,
api_key: str,
template_id: str,
):
try:
sandbox = None
if template_id:
sandbox = await AsyncSandbox.create(
template=template_id, api_key=api_key, timeout=timeout
)
else:
sandbox = await AsyncSandbox.create(api_key=api_key, timeout=timeout)
class ExecuteCodeStepBlock(Block, BaseE2BExecutorMixin):
if not sandbox:
raise Exception("Sandbox not created")
# Running setup commands
for cmd in setup_commands:
await sandbox.commands.run(cmd)
# Executing the code
execution = await sandbox.run_code(
code,
language=language.value,
on_error=lambda e: sandbox.kill(), # Kill the sandbox if there is an error
)
if execution.error:
raise Exception(execution.error)
response = execution.text
stdout_logs = "".join(execution.logs.stdout)
stderr_logs = "".join(execution.logs.stderr)
return sandbox.sandbox_id, response, stdout_logs, stderr_logs
except Exception as e:
raise e
class StepExecutionBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput[
Literal[ProviderName.E2B], Literal["api_key"]
] = CredentialsField(
description=(
"Enter your API key for the E2B platform. "
"You can get it in here - https://e2b.dev/docs"
),
description="Enter your api key for the E2B Sandbox. You can get it in here - https://e2b.dev/docs",
)
sandbox_id: str = SchemaField(
@@ -449,22 +374,8 @@ class ExecuteCodeStepBlock(Block, BaseE2BExecutorMixin):
advanced=False,
)
dispose_sandbox: bool = SchemaField(
description="Whether to dispose of the sandbox after executing this code.",
default=False,
)
class Output(BlockSchema):
main_result: MainCodeExecutionResult = SchemaField(
title="Main Result", description="The main result from the code execution"
)
results: list[CodeExecutionResult] = SchemaField(
description="List of results from the code execution"
)
response: str = SchemaField(
title="Main Text Output",
description="Text output (if any) of the main execution result",
)
response: str = SchemaField(description="Response from code execution")
stdout_logs: str = SchemaField(
description="Standard output logs from execution"
)
@@ -474,10 +385,10 @@ class ExecuteCodeStepBlock(Block, BaseE2BExecutorMixin):
def __init__(self):
super().__init__(
id="82b59b8e-ea10-4d57-9161-8b169b0adba6",
description="Execute code in a previously instantiated sandbox.",
description="Execute code in a previously instantiated sandbox environment.",
categories={BlockCategory.DEVELOPER_TOOLS},
input_schema=ExecuteCodeStepBlock.Input,
output_schema=ExecuteCodeStepBlock.Output,
input_schema=StepExecutionBlock.Input,
output_schema=StepExecutionBlock.Output,
test_credentials=TEST_CREDENTIALS,
test_input={
"credentials": TEST_CREDENTIALS_INPUT,
@@ -486,43 +397,61 @@ class ExecuteCodeStepBlock(Block, BaseE2BExecutorMixin):
"language": ProgrammingLanguage.PYTHON.value,
},
test_output=[
("results", []),
("response", "Hello World"),
("stdout_logs", "Hello World\n"),
],
test_mock={
"execute_code": lambda api_key, code, language, sandbox_id, dispose_sandbox: ( # noqa
[], # results
"Hello World", # text_output
"Hello World\n", # stdout_logs
"", # stderr_logs
sandbox_id, # sandbox_id
"execute_step_code": lambda sandbox_id, step_code, language, api_key: (
"Hello World",
"Hello World\n",
"",
),
},
)
async def execute_step_code(
self,
sandbox_id: str,
code: str,
language: ProgrammingLanguage,
api_key: str,
):
try:
sandbox = await AsyncSandbox.connect(sandbox_id=sandbox_id, api_key=api_key)
if not sandbox:
raise Exception("Sandbox not found")
# Executing the code
execution = await sandbox.run_code(code, language=language.value)
if execution.error:
raise Exception(execution.error)
response = execution.text
stdout_logs = "".join(execution.logs.stdout)
stderr_logs = "".join(execution.logs.stderr)
return response, stdout_logs, stderr_logs
except Exception as e:
raise e
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
try:
results, text_output, stdout, stderr, _ = await self.execute_code(
api_key=credentials.api_key.get_secret_value(),
code=input_data.step_code,
language=input_data.language,
sandbox_id=input_data.sandbox_id,
dispose_sandbox=input_data.dispose_sandbox,
response, stdout_logs, stderr_logs = await self.execute_step_code(
input_data.sandbox_id,
input_data.step_code,
input_data.language,
credentials.api_key.get_secret_value(),
)
# Determine result object shape & filter out empty formats
main_result, results = self.process_execution_results(results)
if main_result:
yield "main_result", main_result
yield "results", results
if text_output:
yield "response", text_output
if stdout:
yield "stdout_logs", stdout
if stderr:
yield "stderr_logs", stderr
if response:
yield "response", response
if stdout_logs:
yield "stdout_logs", stdout_logs
if stderr_logs:
yield "stderr_logs", stderr_logs
except Exception as e:
yield "error", str(e)

View File

@@ -90,7 +90,7 @@ class CodeExtractionBlock(Block):
for aliases in language_aliases.values()
for alias in aliases
)
+ r")[ \t]*\n[\s\S]*?```"
+ r")\s+[\s\S]*?```"
)
remaining_text = re.sub(pattern, "", input_data.text).strip()
@@ -103,9 +103,7 @@ class CodeExtractionBlock(Block):
# Escape special regex characters in the language string
language = re.escape(language)
# Extract all code blocks enclosed in ```language``` blocks
pattern = re.compile(
rf"```{language}[ \t]*\n(.*?)\n```", re.DOTALL | re.IGNORECASE
)
pattern = re.compile(rf"```{language}\s+(.*?)```", re.DOTALL | re.IGNORECASE)
matches = pattern.finditer(text)
# Combine all code blocks for this language with newlines between them
code_blocks = [match.group(1).strip() for match in matches]

View File

@@ -66,7 +66,6 @@ class AddToDictionaryBlock(Block):
dictionary: dict[Any, Any] = SchemaField(
default_factory=dict,
description="The dictionary to add the entry to. If not provided, a new dictionary will be created.",
advanced=False,
)
key: str = SchemaField(
default="",

View File

@@ -113,7 +113,6 @@ class DataForSeoClient:
include_serp_info: bool = False,
include_clickstream_data: bool = False,
limit: int = 100,
depth: Optional[int] = None,
) -> List[Dict[str, Any]]:
"""
Get related keywords from DataForSEO Labs.
@@ -126,7 +125,6 @@ class DataForSeoClient:
include_serp_info: Include SERP data
include_clickstream_data: Include clickstream metrics
limit: Maximum number of results (up to 3000)
depth: Keyword search depth (0-4), controls number of returned keywords
Returns:
API response with related keywords
@@ -150,8 +148,6 @@ class DataForSeoClient:
task_data["include_clickstream_data"] = include_clickstream_data
if limit is not None:
task_data["limit"] = limit
if depth is not None:
task_data["depth"] = depth
payload = [task_data]

View File

@@ -90,7 +90,6 @@ class DataForSeoKeywordSuggestionsBlock(Block):
seed_keyword: str = SchemaField(
description="The seed keyword used for the query"
)
error: str = SchemaField(description="Error message if the API call failed")
def __init__(self):
super().__init__(
@@ -162,52 +161,43 @@ class DataForSeoKeywordSuggestionsBlock(Block):
**kwargs,
) -> BlockOutput:
"""Execute the keyword suggestions query."""
try:
client = DataForSeoClient(credentials)
client = DataForSeoClient(credentials)
results = await self._fetch_keyword_suggestions(client, input_data)
results = await self._fetch_keyword_suggestions(client, input_data)
# Process and format the results
suggestions = []
if results and len(results) > 0:
# results is a list, get the first element
first_result = results[0] if isinstance(results, list) else results
items = (
first_result.get("items", [])
if isinstance(first_result, dict)
else []
# Process and format the results
suggestions = []
if results and len(results) > 0:
# results is a list, get the first element
first_result = results[0] if isinstance(results, list) else results
items = (
first_result.get("items", []) if isinstance(first_result, dict) else []
)
for item in items:
# Create the KeywordSuggestion object
suggestion = KeywordSuggestion(
keyword=item.get("keyword", ""),
search_volume=item.get("keyword_info", {}).get("search_volume"),
competition=item.get("keyword_info", {}).get("competition"),
cpc=item.get("keyword_info", {}).get("cpc"),
keyword_difficulty=item.get("keyword_properties", {}).get(
"keyword_difficulty"
),
serp_info=(
item.get("serp_info") if input_data.include_serp_info else None
),
clickstream_data=(
item.get("clickstream_keyword_info")
if input_data.include_clickstream_data
else None
),
)
if items is None:
items = []
for item in items:
# Create the KeywordSuggestion object
suggestion = KeywordSuggestion(
keyword=item.get("keyword", ""),
search_volume=item.get("keyword_info", {}).get("search_volume"),
competition=item.get("keyword_info", {}).get("competition"),
cpc=item.get("keyword_info", {}).get("cpc"),
keyword_difficulty=item.get("keyword_properties", {}).get(
"keyword_difficulty"
),
serp_info=(
item.get("serp_info")
if input_data.include_serp_info
else None
),
clickstream_data=(
item.get("clickstream_keyword_info")
if input_data.include_clickstream_data
else None
),
)
yield "suggestion", suggestion
suggestions.append(suggestion)
yield "suggestion", suggestion
suggestions.append(suggestion)
yield "suggestions", suggestions
yield "total_count", len(suggestions)
yield "seed_keyword", input_data.keyword
except Exception as e:
yield "error", f"Failed to fetch keyword suggestions: {str(e)}"
yield "suggestions", suggestions
yield "total_count", len(suggestions)
yield "seed_keyword", input_data.keyword
class KeywordSuggestionExtractorBlock(Block):

View File

@@ -78,12 +78,6 @@ class DataForSeoRelatedKeywordsBlock(Block):
ge=1,
le=3000,
)
depth: int = SchemaField(
description="Keyword search depth (0-4). Controls the number of returned keywords: 0=1 keyword, 1=~8 keywords, 2=~72 keywords, 3=~584 keywords, 4=~4680 keywords",
default=1,
ge=0,
le=4,
)
class Output(BlockSchema):
related_keywords: List[RelatedKeyword] = SchemaField(
@@ -98,7 +92,6 @@ class DataForSeoRelatedKeywordsBlock(Block):
seed_keyword: str = SchemaField(
description="The seed keyword used for the query"
)
error: str = SchemaField(description="Error message if the API call failed")
def __init__(self):
super().__init__(
@@ -161,7 +154,6 @@ class DataForSeoRelatedKeywordsBlock(Block):
include_serp_info=input_data.include_serp_info,
include_clickstream_data=input_data.include_clickstream_data,
limit=input_data.limit,
depth=input_data.depth,
)
async def run(
@@ -172,60 +164,50 @@ class DataForSeoRelatedKeywordsBlock(Block):
**kwargs,
) -> BlockOutput:
"""Execute the related keywords query."""
try:
client = DataForSeoClient(credentials)
client = DataForSeoClient(credentials)
results = await self._fetch_related_keywords(client, input_data)
results = await self._fetch_related_keywords(client, input_data)
# Process and format the results
related_keywords = []
if results and len(results) > 0:
# results is a list, get the first element
first_result = results[0] if isinstance(results, list) else results
items = (
first_result.get("items", [])
if isinstance(first_result, dict)
else []
# Process and format the results
related_keywords = []
if results and len(results) > 0:
# results is a list, get the first element
first_result = results[0] if isinstance(results, list) else results
items = (
first_result.get("items", []) if isinstance(first_result, dict) else []
)
for item in items:
# Extract keyword_data from the item
keyword_data = item.get("keyword_data", {})
# Create the RelatedKeyword object
keyword = RelatedKeyword(
keyword=keyword_data.get("keyword", ""),
search_volume=keyword_data.get("keyword_info", {}).get(
"search_volume"
),
competition=keyword_data.get("keyword_info", {}).get("competition"),
cpc=keyword_data.get("keyword_info", {}).get("cpc"),
keyword_difficulty=keyword_data.get("keyword_properties", {}).get(
"keyword_difficulty"
),
serp_info=(
keyword_data.get("serp_info")
if input_data.include_serp_info
else None
),
clickstream_data=(
keyword_data.get("clickstream_keyword_info")
if input_data.include_clickstream_data
else None
),
)
# Ensure items is never None
if items is None:
items = []
for item in items:
# Extract keyword_data from the item
keyword_data = item.get("keyword_data", {})
yield "related_keyword", keyword
related_keywords.append(keyword)
# Create the RelatedKeyword object
keyword = RelatedKeyword(
keyword=keyword_data.get("keyword", ""),
search_volume=keyword_data.get("keyword_info", {}).get(
"search_volume"
),
competition=keyword_data.get("keyword_info", {}).get(
"competition"
),
cpc=keyword_data.get("keyword_info", {}).get("cpc"),
keyword_difficulty=keyword_data.get(
"keyword_properties", {}
).get("keyword_difficulty"),
serp_info=(
keyword_data.get("serp_info")
if input_data.include_serp_info
else None
),
clickstream_data=(
keyword_data.get("clickstream_keyword_info")
if input_data.include_clickstream_data
else None
),
)
yield "related_keyword", keyword
related_keywords.append(keyword)
yield "related_keywords", related_keywords
yield "total_count", len(related_keywords)
yield "seed_keyword", input_data.keyword
except Exception as e:
yield "error", f"Failed to fetch related keywords: {str(e)}"
yield "related_keywords", related_keywords
yield "total_count", len(related_keywords)
yield "seed_keyword", input_data.keyword
class RelatedKeywordExtractorBlock(Block):

View File

@@ -171,11 +171,11 @@ class SendDiscordMessageBlock(Block):
description="The content of the message to send"
)
channel_name: str = SchemaField(
description="Channel ID or channel name to send the message to"
description="The name of the channel the message will be sent to"
)
server_name: str = SchemaField(
description="Server name (only needed if using channel name)",
advanced=True,
description="The name of the server where the channel is located",
advanced=True, # Optional field for server name
default="",
)
@@ -231,49 +231,25 @@ class SendDiscordMessageBlock(Block):
@client.event
async def on_ready():
print(f"Logged in as {client.user}")
channel = None
for guild in client.guilds:
if server_name and guild.name != server_name:
continue
for channel in guild.text_channels:
if channel.name == channel_name:
# Split message into chunks if it exceeds 2000 characters
chunks = self.chunk_message(message_content)
last_message = None
for chunk in chunks:
last_message = await channel.send(chunk)
result["status"] = "Message sent"
result["message_id"] = (
str(last_message.id) if last_message else ""
)
result["channel_id"] = str(channel.id)
await client.close()
return
# Try to parse as channel ID first
try:
channel_id = int(channel_name)
channel = client.get_channel(channel_id)
except ValueError:
# Not a valid ID, will try name lookup
pass
# If not found by ID (or not an ID), try name lookup
if not channel:
for guild in client.guilds:
if server_name and guild.name != server_name:
continue
for ch in guild.text_channels:
if ch.name == channel_name:
channel = ch
break
if channel:
break
if not channel:
result["status"] = f"Channel not found: {channel_name}"
await client.close()
return
# Type check - ensure it's a text channel that can send messages
if not hasattr(channel, "send"):
result["status"] = (
f"Channel {channel_name} cannot receive messages (not a text channel)"
)
await client.close()
return
# Split message into chunks if it exceeds 2000 characters
chunks = self.chunk_message(message_content)
last_message = None
for chunk in chunks:
last_message = await channel.send(chunk) # type: ignore
result["status"] = "Message sent"
result["message_id"] = str(last_message.id) if last_message else ""
result["channel_id"] = str(channel.id)
result["status"] = "Channel not found"
await client.close()
await client.start(token)

View File

@@ -1,12 +0,0 @@
from enum import Enum
class ScrapeFormat(Enum):
MARKDOWN = "markdown"
HTML = "html"
RAW_HTML = "rawHtml"
LINKS = "links"
SCREENSHOT = "screenshot"
SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
JSON = "json"
CHANGE_TRACKING = "changeTracking"

View File

@@ -1,28 +0,0 @@
"""Utility functions for converting between our ScrapeFormat enum and firecrawl FormatOption types."""
from typing import List
from firecrawl.v2.types import FormatOption, ScreenshotFormat
from backend.blocks.firecrawl._api import ScrapeFormat
def convert_to_format_options(
formats: List[ScrapeFormat],
) -> List[FormatOption]:
"""Convert our ScrapeFormat enum values to firecrawl FormatOption types.
Handles special cases like screenshot@fullPage which needs to be converted
to a ScreenshotFormat object.
"""
result: List[FormatOption] = []
for format_enum in formats:
if format_enum.value == "screenshot@fullPage":
# Special case: convert to ScreenshotFormat with full_page=True
result.append(ScreenshotFormat(type="screenshot", full_page=True))
else:
# Regular string literals
result.append(format_enum.value)
return result

View File

@@ -1,9 +1,8 @@
from enum import Enum
from typing import Any
from firecrawl import FirecrawlApp
from firecrawl.v2.types import ScrapeOptions
from firecrawl import FirecrawlApp, ScrapeOptions
from backend.blocks.firecrawl._api import ScrapeFormat
from backend.sdk import (
APIKeyCredentials,
Block,
@@ -15,10 +14,21 @@ from backend.sdk import (
)
from ._config import firecrawl
from ._format_utils import convert_to_format_options
class ScrapeFormat(Enum):
MARKDOWN = "markdown"
HTML = "html"
RAW_HTML = "rawHtml"
LINKS = "links"
SCREENSHOT = "screenshot"
SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
JSON = "json"
CHANGE_TRACKING = "changeTracking"
class FirecrawlCrawlBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
url: str = SchemaField(description="The URL to crawl")
@@ -68,17 +78,18 @@ class FirecrawlCrawlBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
# Sync call
crawl_result = app.crawl(
crawl_result = app.crawl_url(
input_data.url,
limit=input_data.limit,
scrape_options=ScrapeOptions(
formats=convert_to_format_options(input_data.formats),
only_main_content=input_data.only_main_content,
max_age=input_data.max_age,
wait_for=input_data.wait_for,
formats=[format.value for format in input_data.formats],
onlyMainContent=input_data.only_main_content,
maxAge=input_data.max_age,
waitFor=input_data.wait_for,
),
)
yield "data", crawl_result.data
@@ -90,7 +101,7 @@ class FirecrawlCrawlBlock(Block):
elif f == ScrapeFormat.HTML:
yield "html", data.html
elif f == ScrapeFormat.RAW_HTML:
yield "raw_html", data.raw_html
yield "raw_html", data.rawHtml
elif f == ScrapeFormat.LINKS:
yield "links", data.links
elif f == ScrapeFormat.SCREENSHOT:
@@ -98,6 +109,6 @@ class FirecrawlCrawlBlock(Block):
elif f == ScrapeFormat.SCREENSHOT_FULL_PAGE:
yield "screenshot_full_page", data.screenshot
elif f == ScrapeFormat.CHANGE_TRACKING:
yield "change_tracking", data.change_tracking
yield "change_tracking", data.changeTracking
elif f == ScrapeFormat.JSON:
yield "json", data.json

View File

@@ -20,6 +20,7 @@ from ._config import firecrawl
@cost(BlockCost(2, BlockCostType.RUN))
class FirecrawlExtractBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
urls: list[str] = SchemaField(
@@ -52,6 +53,7 @@ class FirecrawlExtractBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
extract_result = app.extract(

View File

@@ -1,5 +1,3 @@
from typing import Any
from firecrawl import FirecrawlApp
from backend.sdk import (
@@ -16,16 +14,14 @@ from ._config import firecrawl
class FirecrawlMapWebsiteBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
url: str = SchemaField(description="The website url to map")
class Output(BlockSchema):
links: list[str] = SchemaField(description="List of URLs found on the website")
results: list[dict[str, Any]] = SchemaField(
description="List of search results with url, title, and description"
)
links: list[str] = SchemaField(description="The links of the website")
def __init__(self):
super().__init__(
@@ -39,22 +35,12 @@ class FirecrawlMapWebsiteBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
# Sync call
map_result = app.map(
map_result = app.map_url(
url=input_data.url,
)
# Convert SearchResult objects to dicts
results_data = [
{
"url": link.url,
"title": link.title,
"description": link.description,
}
for link in map_result.links
]
yield "links", [link.url for link in map_result.links]
yield "results", results_data
yield "links", map_result.links

View File

@@ -1,8 +1,8 @@
from enum import Enum
from typing import Any
from firecrawl import FirecrawlApp
from backend.blocks.firecrawl._api import ScrapeFormat
from backend.sdk import (
APIKeyCredentials,
Block,
@@ -14,10 +14,21 @@ from backend.sdk import (
)
from ._config import firecrawl
from ._format_utils import convert_to_format_options
class ScrapeFormat(Enum):
MARKDOWN = "markdown"
HTML = "html"
RAW_HTML = "rawHtml"
LINKS = "links"
SCREENSHOT = "screenshot"
SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
JSON = "json"
CHANGE_TRACKING = "changeTracking"
class FirecrawlScrapeBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
url: str = SchemaField(description="The URL to crawl")
@@ -67,11 +78,12 @@ class FirecrawlScrapeBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
scrape_result = app.scrape(
scrape_result = app.scrape_url(
input_data.url,
formats=convert_to_format_options(input_data.formats),
formats=[format.value for format in input_data.formats],
only_main_content=input_data.only_main_content,
max_age=input_data.max_age,
wait_for=input_data.wait_for,
@@ -84,7 +96,7 @@ class FirecrawlScrapeBlock(Block):
elif f == ScrapeFormat.HTML:
yield "html", scrape_result.html
elif f == ScrapeFormat.RAW_HTML:
yield "raw_html", scrape_result.raw_html
yield "raw_html", scrape_result.rawHtml
elif f == ScrapeFormat.LINKS:
yield "links", scrape_result.links
elif f == ScrapeFormat.SCREENSHOT:
@@ -92,6 +104,6 @@ class FirecrawlScrapeBlock(Block):
elif f == ScrapeFormat.SCREENSHOT_FULL_PAGE:
yield "screenshot_full_page", scrape_result.screenshot
elif f == ScrapeFormat.CHANGE_TRACKING:
yield "change_tracking", scrape_result.change_tracking
yield "change_tracking", scrape_result.changeTracking
elif f == ScrapeFormat.JSON:
yield "json", scrape_result.json

View File

@@ -1,9 +1,8 @@
from enum import Enum
from typing import Any
from firecrawl import FirecrawlApp
from firecrawl.v2.types import ScrapeOptions
from firecrawl import FirecrawlApp, ScrapeOptions
from backend.blocks.firecrawl._api import ScrapeFormat
from backend.sdk import (
APIKeyCredentials,
Block,
@@ -15,10 +14,21 @@ from backend.sdk import (
)
from ._config import firecrawl
from ._format_utils import convert_to_format_options
class ScrapeFormat(Enum):
MARKDOWN = "markdown"
HTML = "html"
RAW_HTML = "rawHtml"
LINKS = "links"
SCREENSHOT = "screenshot"
SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
JSON = "json"
CHANGE_TRACKING = "changeTracking"
class FirecrawlSearchBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
query: str = SchemaField(description="The query to search for")
@@ -51,6 +61,7 @@ class FirecrawlSearchBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
# Sync call
@@ -58,12 +69,11 @@ class FirecrawlSearchBlock(Block):
input_data.query,
limit=input_data.limit,
scrape_options=ScrapeOptions(
formats=convert_to_format_options(input_data.formats) or None,
max_age=input_data.max_age,
wait_for=input_data.wait_for,
formats=[format.value for format in input_data.formats],
maxAge=input_data.max_age,
waitFor=input_data.wait_for,
),
)
yield "data", scrape_result
if hasattr(scrape_result, "web") and scrape_result.web:
for site in scrape_result.web:
yield "site", site
for site in scrape_result.data:
yield "site", site

View File

@@ -10,6 +10,7 @@ from backend.util.settings import Config
from backend.util.text import TextFormatter
from backend.util.type import LongTextType, MediaFileType, ShortTextType
formatter = TextFormatter()
config = Config()
@@ -131,11 +132,6 @@ class AgentOutputBlock(Block):
default="",
advanced=True,
)
escape_html: bool = SchemaField(
default=False,
advanced=True,
description="Whether to escape special characters in the inserted values to be HTML-safe. Enable for HTML output, disable for plain text.",
)
advanced: bool = SchemaField(
description="Whether to treat the output as advanced.",
default=False,
@@ -197,7 +193,6 @@ class AgentOutputBlock(Block):
"""
if input_data.format:
try:
formatter = TextFormatter(autoescape=input_data.escape_html)
yield "output", formatter.format_string(
input_data.format, {input_data.name: input_data.value}
)
@@ -554,89 +549,6 @@ class AgentToggleInputBlock(AgentInputBlock):
)
class AgentTableInputBlock(AgentInputBlock):
"""
This block allows users to input data in a table format.
Configure the table columns at build time, then users can input
rows of data at runtime. Each row is output as a dictionary
with column names as keys.
"""
class Input(AgentInputBlock.Input):
value: Optional[list[dict[str, Any]]] = SchemaField(
description="The table data as a list of dictionaries.",
default=None,
advanced=False,
title="Default Value",
)
column_headers: list[str] = SchemaField(
description="Column headers for the table.",
default_factory=lambda: ["Column 1", "Column 2", "Column 3"],
advanced=False,
title="Column Headers",
)
def generate_schema(self):
"""Generate schema for the value field with table format."""
schema = super().generate_schema()
schema["type"] = "array"
schema["format"] = "table"
schema["items"] = {
"type": "object",
"properties": {
header: {"type": "string"}
for header in (
self.column_headers or ["Column 1", "Column 2", "Column 3"]
)
},
}
if self.value is not None:
schema["default"] = self.value
return schema
class Output(AgentInputBlock.Output):
result: list[dict[str, Any]] = SchemaField(
description="The table data as a list of dictionaries with headers as keys."
)
def __init__(self):
super().__init__(
id="5603b273-f41e-4020-af7d-fbc9c6a8d928",
description="Block for table data input with customizable headers.",
disabled=not config.enable_agent_input_subtype_blocks,
input_schema=AgentTableInputBlock.Input,
output_schema=AgentTableInputBlock.Output,
test_input=[
{
"name": "test_table",
"column_headers": ["Name", "Age", "City"],
"value": [
{"Name": "John", "Age": "30", "City": "New York"},
{"Name": "Jane", "Age": "25", "City": "London"},
],
"description": "Example table input",
}
],
test_output=[
(
"result",
[
{"Name": "John", "Age": "30", "City": "New York"},
{"Name": "Jane", "Age": "25", "City": "London"},
],
)
],
)
async def run(self, input_data: Input, *args, **kwargs) -> BlockOutput:
"""
Yields the table data as a list of dictionaries.
"""
# Pass through the value, defaulting to empty list if None
yield "result", input_data.value if input_data.value is not None else []
IO_BLOCK_IDs = [
AgentInputBlock().id,
AgentOutputBlock().id,
@@ -648,5 +560,4 @@ IO_BLOCK_IDs = [
AgentFileInputBlock().id,
AgentDropdownInputBlock().id,
AgentToggleInputBlock().id,
AgentTableInputBlock().id,
]

View File

@@ -54,43 +54,20 @@ class StepThroughItemsBlock(Block):
)
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
# Security fix: Add limits to prevent DoS from large iterations
MAX_ITEMS = 10000 # Maximum items to iterate
MAX_ITEM_SIZE = 1024 * 1024 # 1MB per item
for data in [input_data.items, input_data.items_object, input_data.items_str]:
if not data:
continue
# Limit string size before parsing
if isinstance(data, str):
if len(data) > MAX_ITEM_SIZE:
raise ValueError(
f"Input too large: {len(data)} bytes > {MAX_ITEM_SIZE} bytes"
)
items = json.loads(data)
else:
items = data
# Check total item count
if isinstance(items, (list, dict)):
if len(items) > MAX_ITEMS:
raise ValueError(f"Too many items: {len(items)} > {MAX_ITEMS}")
iteration_count = 0
if isinstance(items, dict):
# If items is a dictionary, iterate over its values
for key, value in items.items():
if iteration_count >= MAX_ITEMS:
break
yield "item", value
yield "key", key # Fixed: should yield key, not item
iteration_count += 1
for item in items.values():
yield "item", item
yield "key", item
else:
# If items is a list, iterate over the list
for index, item in enumerate(items):
if iteration_count >= MAX_ITEMS:
break
yield "item", item
yield "key", index
iteration_count += 1

View File

@@ -1,8 +1,5 @@
from typing import List
from urllib.parse import quote
from typing_extensions import TypedDict
from backend.blocks.jina._auth import (
JinaCredentials,
JinaCredentialsField,
@@ -13,12 +10,6 @@ from backend.data.model import SchemaField
from backend.util.request import Requests
class Reference(TypedDict):
url: str
keyQuote: str
isSupportive: bool
class FactCheckerBlock(Block):
class Input(BlockSchema):
statement: str = SchemaField(
@@ -32,10 +23,6 @@ class FactCheckerBlock(Block):
)
result: bool = SchemaField(description="The result of the factuality check")
reason: str = SchemaField(description="The reason for the factuality result")
references: List[Reference] = SchemaField(
description="List of references supporting or contradicting the statement",
default=[],
)
error: str = SchemaField(description="Error message if the check fails")
def __init__(self):
@@ -66,11 +53,5 @@ class FactCheckerBlock(Block):
yield "factuality", data["factuality"]
yield "result", data["result"]
yield "reason", data["reason"]
# Yield references if present in the response
if "references" in data:
yield "references", data["references"]
else:
yield "references", []
else:
raise RuntimeError(f"Expected 'data' key not found in response: {data}")

View File

@@ -37,5 +37,5 @@ class Project(BaseModel):
name: str
description: str
priority: int
progress: float
content: str | None
progress: int
content: str

View File

@@ -1,9 +1,5 @@
# This file contains a lot of prompt block strings that would trigger "line too long"
# flake8: noqa: E501
import ast
import logging
import re
import secrets
from abc import ABC
from enum import Enum, EnumMeta
from json import JSONDecodeError
@@ -31,7 +27,7 @@ from backend.util.prompt import compress_prompt, estimate_token_count
from backend.util.text import TextFormatter
logger = TruncatedLogger(logging.getLogger(__name__), "[LLM-Block]")
fmt = TextFormatter(autoescape=False)
fmt = TextFormatter()
LLMProviderName = Literal[
ProviderName.AIML_API,
@@ -101,8 +97,6 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
CLAUDE_4_1_OPUS = "claude-opus-4-1-20250805"
CLAUDE_4_OPUS = "claude-opus-4-20250514"
CLAUDE_4_SONNET = "claude-sonnet-4-20250514"
CLAUDE_4_5_SONNET = "claude-sonnet-4-5-20250929"
CLAUDE_4_5_HAIKU = "claude-haiku-4-5-20251001"
CLAUDE_3_7_SONNET = "claude-3-7-sonnet-20250219"
CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
CLAUDE_3_5_HAIKU = "claude-3-5-haiku-latest"
@@ -210,19 +204,13 @@ MODEL_METADATA = {
"anthropic", 200000, 32000
), # claude-opus-4-1-20250805
LlmModel.CLAUDE_4_OPUS: ModelMetadata(
"anthropic", 200000, 32000
"anthropic", 200000, 8192
), # claude-4-opus-20250514
LlmModel.CLAUDE_4_SONNET: ModelMetadata(
"anthropic", 200000, 64000
"anthropic", 200000, 8192
), # claude-4-sonnet-20250514
LlmModel.CLAUDE_4_5_SONNET: ModelMetadata(
"anthropic", 200000, 64000
), # claude-sonnet-4-5-20250929
LlmModel.CLAUDE_4_5_HAIKU: ModelMetadata(
"anthropic", 200000, 64000
), # claude-haiku-4-5-20251001
LlmModel.CLAUDE_3_7_SONNET: ModelMetadata(
"anthropic", 200000, 64000
"anthropic", 200000, 8192
), # claude-3-7-sonnet-20250219
LlmModel.CLAUDE_3_5_SONNET: ModelMetadata(
"anthropic", 200000, 8192
@@ -394,9 +382,7 @@ def extract_openai_tool_calls(response) -> list[ToolContentBlock] | None:
return None
def get_parallel_tool_calls_param(
llm_model: LlmModel, parallel_tool_calls: bool | None
):
def get_parallel_tool_calls_param(llm_model: LlmModel, parallel_tool_calls):
"""Get the appropriate parallel_tool_calls parameter for OpenAI-compatible APIs."""
if llm_model.startswith("o") or parallel_tool_calls is None:
return openai.NOT_GIVEN
@@ -407,8 +393,8 @@ async def llm_call(
credentials: APIKeyCredentials,
llm_model: LlmModel,
prompt: list[dict],
json_format: bool,
max_tokens: int | None,
force_json_output: bool = False,
tools: list[dict] | None = None,
ollama_host: str = "localhost:11434",
parallel_tool_calls=None,
@@ -421,7 +407,7 @@ async def llm_call(
credentials: The API key credentials to use.
llm_model: The LLM model to use.
prompt: The prompt to send to the LLM.
force_json_output: Whether the response should be in JSON format.
json_format: Whether the response should be in JSON format.
max_tokens: The maximum number of tokens to generate in the chat completion.
tools: The tools to use in the chat completion.
ollama_host: The host for ollama to use.
@@ -460,7 +446,7 @@ async def llm_call(
llm_model, parallel_tool_calls
)
if force_json_output:
if json_format:
response_format = {"type": "json_object"}
response = await oai_client.chat.completions.create(
@@ -573,7 +559,7 @@ async def llm_call(
raise ValueError("Groq does not support tools.")
client = AsyncGroq(api_key=credentials.api_key.get_secret_value())
response_format = {"type": "json_object"} if force_json_output else None
response_format = {"type": "json_object"} if json_format else None
response = await client.chat.completions.create(
model=llm_model.value,
messages=prompt, # type: ignore
@@ -731,7 +717,7 @@ async def llm_call(
)
response_format = None
if force_json_output:
if json_format:
response_format = {"type": "json_object"}
parallel_tool_calls_param = get_parallel_tool_calls_param(
@@ -794,17 +780,6 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
description="The language model to use for answering the prompt.",
advanced=False,
)
force_json_output: bool = SchemaField(
title="Restrict LLM to pure JSON output",
default=False,
description=(
"Whether to force the LLM to produce a JSON-only response. "
"This can increase the block's reliability, "
"but may also reduce the quality of the response "
"because it prohibits the LLM from reasoning "
"before providing its JSON response."
),
)
credentials: AICredentials = AICredentialsField()
sys_prompt: str = SchemaField(
title="System Prompt",
@@ -873,18 +848,17 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
"llm_call": lambda *args, **kwargs: LLMResponse(
raw_response="",
prompt=[""],
response=(
'<json_output id="test123456">{\n'
' "key1": "key1Value",\n'
' "key2": "key2Value"\n'
"}</json_output>"
response=json.dumps(
{
"key1": "key1Value",
"key2": "key2Value",
}
),
tool_calls=None,
prompt_tokens=0,
completion_tokens=0,
reasoning=None,
),
"get_collision_proof_output_tag_id": lambda *args: "test123456",
)
},
)
@@ -893,9 +867,9 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
credentials: APIKeyCredentials,
llm_model: LlmModel,
prompt: list[dict],
json_format: bool,
compress_prompt_to_fit: bool,
max_tokens: int | None,
force_json_output: bool = False,
compress_prompt_to_fit: bool = True,
tools: list[dict] | None = None,
ollama_host: str = "localhost:11434",
) -> LLMResponse:
@@ -908,8 +882,8 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
credentials=credentials,
llm_model=llm_model,
prompt=prompt,
json_format=json_format,
max_tokens=max_tokens,
force_json_output=force_json_output,
tools=tools,
ollama_host=ollama_host,
compress_prompt_to_fit=compress_prompt_to_fit,
@@ -921,6 +895,10 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
logger.debug(f"Calling LLM with input data: {input_data}")
prompt = [json.to_dict(p) for p in input_data.conversation_history]
def trim_prompt(s: str) -> str:
lines = s.strip().split("\n")
return "\n".join([line.strip().lstrip("|") for line in lines])
values = input_data.prompt_values
if values:
input_data.prompt = fmt.format_string(input_data.prompt, values)
@@ -929,15 +907,27 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
if input_data.sys_prompt:
prompt.append({"role": "system", "content": input_data.sys_prompt})
# Use a one-time unique tag to prevent collisions with user/LLM content
output_tag_id = self.get_collision_proof_output_tag_id()
output_tag_start = f'<json_output id="{output_tag_id}">'
if input_data.expected_format:
sys_prompt = self.response_format_instructions(
input_data.expected_format,
list_mode=input_data.list_result,
pure_json_mode=input_data.force_json_output,
output_tag_start=output_tag_start,
expected_format = [
f'"{k}": "{v}"' for k, v in input_data.expected_format.items()
]
if input_data.list_result:
format_prompt = (
f'"results": [\n {{\n {", ".join(expected_format)}\n }}\n]'
)
else:
format_prompt = "\n ".join(expected_format)
sys_prompt = trim_prompt(
f"""
|Reply strictly only in the following JSON format:
|{{
| {format_prompt}
|}}
|
|Ensure the response is valid JSON. Do not include any additional text outside of the JSON.
|If you cannot provide all the keys, provide an empty string for the values you cannot answer.
"""
)
prompt.append({"role": "system", "content": sys_prompt})
@@ -955,21 +945,18 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
except JSONDecodeError as e:
return f"JSON decode error: {e}"
error_feedback_message = ""
logger.debug(f"LLM request: {prompt}")
retry_prompt = ""
llm_model = input_data.model
for retry_count in range(input_data.retry):
logger.debug(f"LLM request: {prompt}")
try:
llm_response = await self.llm_call(
credentials=credentials,
llm_model=llm_model,
prompt=prompt,
compress_prompt_to_fit=input_data.compress_prompt_to_fit,
force_json_output=(
input_data.force_json_output
and bool(input_data.expected_format)
),
json_format=bool(input_data.expected_format),
ollama_host=input_data.ollama_host,
max_tokens=input_data.max_tokens,
)
@@ -983,55 +970,16 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
logger.debug(f"LLM attempt-{retry_count} response: {response_text}")
if input_data.expected_format:
try:
response_obj = self.get_json_from_response(
response_text,
pure_json_mode=input_data.force_json_output,
output_tag_start=output_tag_start,
)
except (ValueError, JSONDecodeError) as parse_error:
censored_response = re.sub(r"[A-Za-z0-9]", "*", response_text)
response_snippet = (
f"{censored_response[:50]}...{censored_response[-30:]}"
)
logger.warning(
f"Error getting JSON from LLM response: {parse_error}\n\n"
f"Response start+end: `{response_snippet}`"
)
prompt.append({"role": "assistant", "content": response_text})
error_feedback_message = self.invalid_response_feedback(
parse_error,
was_parseable=False,
list_mode=input_data.list_result,
pure_json_mode=input_data.force_json_output,
output_tag_start=output_tag_start,
)
prompt.append(
{"role": "user", "content": error_feedback_message}
)
continue
response_obj = json.loads(response_text)
# Handle object response for `force_json_output`+`list_result`
if input_data.list_result and isinstance(response_obj, dict):
if "results" in response_obj and isinstance(
response_obj["results"], list
):
response_obj = response_obj["results"]
else:
error_feedback_message = (
"Expected an array of objects in the 'results' key, "
f"but got: {response_obj}"
)
prompt.append(
{"role": "assistant", "content": response_text}
)
prompt.append(
{"role": "user", "content": error_feedback_message}
)
continue
if "results" in response_obj:
response_obj = response_obj.get("results", [])
elif len(response_obj) == 1:
response_obj = list(response_obj.values())
validation_errors = "\n".join(
response_error = "\n".join(
[
validation_error
for response_item in (
@@ -1043,7 +991,7 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
]
)
if not validation_errors:
if not response_error:
self.merge_stats(
NodeExecutionStats(
llm_call_count=retry_count + 1,
@@ -1053,16 +1001,6 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
yield "response", response_obj
yield "prompt", self.prompt
return
prompt.append({"role": "assistant", "content": response_text})
error_feedback_message = self.invalid_response_feedback(
validation_errors,
was_parseable=True,
list_mode=input_data.list_result,
pure_json_mode=input_data.force_json_output,
output_tag_start=output_tag_start,
)
prompt.append({"role": "user", "content": error_feedback_message})
else:
self.merge_stats(
NodeExecutionStats(
@@ -1073,6 +1011,21 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
yield "response", {"response": response_text}
yield "prompt", self.prompt
return
retry_prompt = trim_prompt(
f"""
|This is your previous error response:
|--
|{response_text}
|--
|
|And this is the error:
|--
|{response_error}
|--
"""
)
prompt.append({"role": "user", "content": retry_prompt})
except Exception as e:
logger.exception(f"Error calling LLM: {e}")
if (
@@ -1085,133 +1038,9 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
logger.debug(
f"Reducing max_tokens to {input_data.max_tokens} for next attempt"
)
# Don't add retry prompt for token limit errors,
# just retry with lower maximum output tokens
retry_prompt = f"Error calling LLM: {e}"
error_feedback_message = f"Error calling LLM: {e}"
raise RuntimeError(error_feedback_message)
def response_format_instructions(
self,
expected_object_format: dict[str, str],
*,
list_mode: bool,
pure_json_mode: bool,
output_tag_start: str,
) -> str:
expected_output_format = json.dumps(expected_object_format, indent=2)
output_type = "object" if not list_mode else "array"
outer_output_type = "object" if pure_json_mode else output_type
if output_type == "array":
indented_obj_format = expected_output_format.replace("\n", "\n ")
expected_output_format = f"[\n {indented_obj_format},\n ...\n]"
if pure_json_mode:
indented_list_format = expected_output_format.replace("\n", "\n ")
expected_output_format = (
"{\n"
' "reasoning": "... (optional)",\n' # for better performance
f' "results": {indented_list_format}\n'
"}"
)
# Preserve indentation in prompt
expected_output_format = expected_output_format.replace("\n", "\n|")
# Prepare prompt
if not pure_json_mode:
expected_output_format = (
f"{output_tag_start}\n{expected_output_format}\n</json_output>"
)
instructions = f"""
|In your response you MUST include a valid JSON {outer_output_type} strictly following this format:
|{expected_output_format}
|
|If you cannot provide all the keys, you MUST provide an empty string for the values you cannot answer.
""".strip()
if not pure_json_mode:
instructions += f"""
|
|You MUST enclose your final JSON answer in {output_tag_start}...</json_output> tags, even if the user specifies a different tag.
|There MUST be exactly ONE {output_tag_start}...</json_output> block in your response, which MUST ONLY contain the JSON {outer_output_type} and nothing else. Other text outside this block is allowed.
""".strip()
return trim_prompt(instructions)
def invalid_response_feedback(
self,
error,
*,
was_parseable: bool,
list_mode: bool,
pure_json_mode: bool,
output_tag_start: str,
) -> str:
outer_output_type = "object" if not list_mode or pure_json_mode else "array"
if was_parseable:
complaint = f"Your previous response did not match the expected {outer_output_type} format."
else:
complaint = f"Your previous response did not contain a parseable JSON {outer_output_type}."
indented_parse_error = str(error).replace("\n", "\n|")
instruction = (
f"Please provide a {output_tag_start}...</json_output> block containing a"
if not pure_json_mode
else "Please provide a"
) + f" valid JSON {outer_output_type} that matches the expected format."
return trim_prompt(
f"""
|{complaint}
|
|{indented_parse_error}
|
|{instruction}
"""
)
def get_json_from_response(
self, response_text: str, *, pure_json_mode: bool, output_tag_start: str
) -> dict[str, Any] | list[dict[str, Any]]:
if pure_json_mode:
# Handle pure JSON responses
try:
return json.loads(response_text)
except JSONDecodeError as first_parse_error:
# If that didn't work, try finding the { and } to deal with possible ```json fences etc.
json_start = response_text.find("{")
json_end = response_text.rfind("}")
try:
return json.loads(response_text[json_start : json_end + 1])
except JSONDecodeError:
# Raise the original error, as it's more likely to be relevant
raise first_parse_error from None
if output_tag_start not in response_text:
raise ValueError(
"Response does not contain the expected "
f"{output_tag_start}...</json_output> block."
)
json_output = (
response_text.split(output_tag_start, 1)[1]
.rsplit("</json_output>", 1)[0]
.strip()
)
return json.loads(json_output)
def get_collision_proof_output_tag_id(self) -> str:
return secrets.token_hex(8)
def trim_prompt(s: str) -> str:
"""Removes indentation up to and including `|` from a multi-line prompt."""
lines = s.strip().split("\n")
return "\n".join([line.strip().lstrip("|") for line in lines])
raise RuntimeError(retry_prompt)
class AITextGeneratorBlock(AIBlockBase):
@@ -1408,27 +1237,11 @@ class AITextSummarizerBlock(AIBlockBase):
@staticmethod
def _split_text(text: str, max_tokens: int, overlap: int) -> list[str]:
# Security fix: Add validation to prevent DoS attacks
# Limit text size to prevent memory exhaustion
MAX_TEXT_LENGTH = 1_000_000 # 1MB character limit
MAX_CHUNKS = 100 # Maximum number of chunks to prevent excessive memory use
if len(text) > MAX_TEXT_LENGTH:
text = text[:MAX_TEXT_LENGTH]
# Ensure chunk_size is at least 1 to prevent infinite loops
chunk_size = max(1, max_tokens - overlap)
# Ensure overlap is less than max_tokens to prevent invalid configurations
if overlap >= max_tokens:
overlap = max(0, max_tokens - 1)
words = text.split()
chunks = []
chunk_size = max_tokens - overlap
for i in range(0, len(words), chunk_size):
if len(chunks) >= MAX_CHUNKS:
break # Limit the number of chunks to prevent memory exhaustion
chunk = " ".join(words[i : i + max_tokens])
chunks.append(chunk)

View File

@@ -1,536 +0,0 @@
"""
Notion API helper functions and client for making authenticated requests.
"""
from typing import Any, Dict, List, Optional
from backend.data.model import OAuth2Credentials
from backend.util.request import Requests
NOTION_VERSION = "2022-06-28"
class NotionAPIException(Exception):
"""Exception raised for Notion API errors."""
def __init__(self, message: str, status_code: int):
super().__init__(message)
self.status_code = status_code
class NotionClient:
"""Client for interacting with the Notion API."""
def __init__(self, credentials: OAuth2Credentials):
self.credentials = credentials
self.headers = {
"Authorization": credentials.auth_header(),
"Notion-Version": NOTION_VERSION,
"Content-Type": "application/json",
}
self.requests = Requests()
async def get_page(self, page_id: str) -> dict:
"""
Fetch a page by ID.
Args:
page_id: The ID of the page to fetch.
Returns:
The page object from Notion API.
"""
url = f"https://api.notion.com/v1/pages/{page_id}"
response = await self.requests.get(url, headers=self.headers)
if not response.ok:
raise NotionAPIException(
f"Failed to fetch page: {response.status} - {response.text()}",
response.status,
)
return response.json()
async def get_blocks(self, block_id: str, recursive: bool = True) -> List[dict]:
"""
Fetch all blocks from a page or block.
Args:
block_id: The ID of the page or block to fetch children from.
recursive: Whether to fetch nested blocks recursively.
Returns:
List of block objects.
"""
blocks = []
cursor = None
while True:
url = f"https://api.notion.com/v1/blocks/{block_id}/children"
params = {"page_size": 100}
if cursor:
params["start_cursor"] = cursor
response = await self.requests.get(url, headers=self.headers, params=params)
if not response.ok:
raise NotionAPIException(
f"Failed to fetch blocks: {response.status} - {response.text()}",
response.status,
)
data = response.json()
current_blocks = data.get("results", [])
# If recursive, fetch children for blocks that have them
if recursive:
for block in current_blocks:
if block.get("has_children"):
block["children"] = await self.get_blocks(
block["id"], recursive=True
)
blocks.extend(current_blocks)
if not data.get("has_more"):
break
cursor = data.get("next_cursor")
return blocks
async def query_database(
self,
database_id: str,
filter_obj: Optional[dict] = None,
sorts: Optional[List[dict]] = None,
page_size: int = 100,
) -> dict:
"""
Query a database with optional filters and sorts.
Args:
database_id: The ID of the database to query.
filter_obj: Optional filter object for the query.
sorts: Optional list of sort objects.
page_size: Number of results per page.
Returns:
Query results including pages and pagination info.
"""
url = f"https://api.notion.com/v1/databases/{database_id}/query"
payload: Dict[str, Any] = {"page_size": page_size}
if filter_obj:
payload["filter"] = filter_obj
if sorts:
payload["sorts"] = sorts
response = await self.requests.post(url, headers=self.headers, json=payload)
if not response.ok:
raise NotionAPIException(
f"Failed to query database: {response.status} - {response.text()}",
response.status,
)
return response.json()
async def create_page(
self,
parent: dict,
properties: dict,
children: Optional[List[dict]] = None,
icon: Optional[dict] = None,
cover: Optional[dict] = None,
) -> dict:
"""
Create a new page.
Args:
parent: Parent object (page_id or database_id).
properties: Page properties.
children: Optional list of block children.
icon: Optional icon object.
cover: Optional cover object.
Returns:
The created page object.
"""
url = "https://api.notion.com/v1/pages"
payload: Dict[str, Any] = {"parent": parent, "properties": properties}
if children:
payload["children"] = children
if icon:
payload["icon"] = icon
if cover:
payload["cover"] = cover
response = await self.requests.post(url, headers=self.headers, json=payload)
if not response.ok:
raise NotionAPIException(
f"Failed to create page: {response.status} - {response.text()}",
response.status,
)
return response.json()
async def update_page(self, page_id: str, properties: dict) -> dict:
"""
Update a page's properties.
Args:
page_id: The ID of the page to update.
properties: Properties to update.
Returns:
The updated page object.
"""
url = f"https://api.notion.com/v1/pages/{page_id}"
response = await self.requests.patch(
url, headers=self.headers, json={"properties": properties}
)
if not response.ok:
raise NotionAPIException(
f"Failed to update page: {response.status} - {response.text()}",
response.status,
)
return response.json()
async def append_blocks(self, block_id: str, children: List[dict]) -> dict:
"""
Append blocks to a page or block.
Args:
block_id: The ID of the page or block to append to.
children: List of block objects to append.
Returns:
Response with the created blocks.
"""
url = f"https://api.notion.com/v1/blocks/{block_id}/children"
response = await self.requests.patch(
url, headers=self.headers, json={"children": children}
)
if not response.ok:
raise NotionAPIException(
f"Failed to append blocks: {response.status} - {response.text()}",
response.status,
)
return response.json()
async def search(
self,
query: str = "",
filter_obj: Optional[dict] = None,
sort: Optional[dict] = None,
page_size: int = 100,
) -> dict:
"""
Search for pages and databases.
Args:
query: Search query text.
filter_obj: Optional filter object.
sort: Optional sort object.
page_size: Number of results per page.
Returns:
Search results.
"""
url = "https://api.notion.com/v1/search"
payload: Dict[str, Any] = {"page_size": page_size}
if query:
payload["query"] = query
if filter_obj:
payload["filter"] = filter_obj
if sort:
payload["sort"] = sort
response = await self.requests.post(url, headers=self.headers, json=payload)
if not response.ok:
raise NotionAPIException(
f"Search failed: {response.status} - {response.text()}", response.status
)
return response.json()
# Conversion helper functions
def parse_rich_text(rich_text_array: List[dict]) -> str:
"""
Extract plain text from a Notion rich text array.
Args:
rich_text_array: Array of rich text objects from Notion.
Returns:
Plain text string.
"""
if not rich_text_array:
return ""
text_parts = []
for text_obj in rich_text_array:
if "plain_text" in text_obj:
text_parts.append(text_obj["plain_text"])
return "".join(text_parts)
def rich_text_to_markdown(rich_text_array: List[dict]) -> str:
"""
Convert Notion rich text array to markdown with formatting.
Args:
rich_text_array: Array of rich text objects from Notion.
Returns:
Markdown formatted string.
"""
if not rich_text_array:
return ""
markdown_parts = []
for text_obj in rich_text_array:
text = text_obj.get("plain_text", "")
annotations = text_obj.get("annotations", {})
# Apply formatting based on annotations
if annotations.get("code"):
text = f"`{text}`"
else:
if annotations.get("bold"):
text = f"**{text}**"
if annotations.get("italic"):
text = f"*{text}*"
if annotations.get("strikethrough"):
text = f"~~{text}~~"
if annotations.get("underline"):
text = f"<u>{text}</u>"
# Handle links
if text_obj.get("href"):
text = f"[{text}]({text_obj['href']})"
markdown_parts.append(text)
return "".join(markdown_parts)
def block_to_markdown(block: dict, indent_level: int = 0) -> str:
"""
Convert a single Notion block to markdown.
Args:
block: Block object from Notion API.
indent_level: Current indentation level for nested blocks.
Returns:
Markdown string representation of the block.
"""
block_type = block.get("type")
indent = " " * indent_level
markdown_lines = []
# Handle different block types
if block_type == "paragraph":
text = rich_text_to_markdown(block["paragraph"].get("rich_text", []))
if text:
markdown_lines.append(f"{indent}{text}")
elif block_type == "heading_1":
text = parse_rich_text(block["heading_1"].get("rich_text", []))
markdown_lines.append(f"{indent}# {text}")
elif block_type == "heading_2":
text = parse_rich_text(block["heading_2"].get("rich_text", []))
markdown_lines.append(f"{indent}## {text}")
elif block_type == "heading_3":
text = parse_rich_text(block["heading_3"].get("rich_text", []))
markdown_lines.append(f"{indent}### {text}")
elif block_type == "bulleted_list_item":
text = rich_text_to_markdown(block["bulleted_list_item"].get("rich_text", []))
markdown_lines.append(f"{indent}- {text}")
elif block_type == "numbered_list_item":
text = rich_text_to_markdown(block["numbered_list_item"].get("rich_text", []))
# Note: This is simplified - proper numbering would need context
markdown_lines.append(f"{indent}1. {text}")
elif block_type == "to_do":
text = rich_text_to_markdown(block["to_do"].get("rich_text", []))
checked = "x" if block["to_do"].get("checked") else " "
markdown_lines.append(f"{indent}- [{checked}] {text}")
elif block_type == "toggle":
text = rich_text_to_markdown(block["toggle"].get("rich_text", []))
markdown_lines.append(f"{indent}<details>")
markdown_lines.append(f"{indent}<summary>{text}</summary>")
markdown_lines.append(f"{indent}")
# Process children if they exist
if block.get("children"):
for child in block["children"]:
child_markdown = block_to_markdown(child, indent_level + 1)
if child_markdown:
markdown_lines.append(child_markdown)
markdown_lines.append(f"{indent}</details>")
elif block_type == "code":
code = parse_rich_text(block["code"].get("rich_text", []))
language = block["code"].get("language", "")
markdown_lines.append(f"{indent}```{language}")
markdown_lines.append(f"{indent}{code}")
markdown_lines.append(f"{indent}```")
elif block_type == "quote":
text = rich_text_to_markdown(block["quote"].get("rich_text", []))
markdown_lines.append(f"{indent}> {text}")
elif block_type == "divider":
markdown_lines.append(f"{indent}---")
elif block_type == "image":
image = block["image"]
url = image.get("external", {}).get("url") or image.get("file", {}).get(
"url", ""
)
caption = parse_rich_text(image.get("caption", []))
alt_text = caption if caption else "Image"
markdown_lines.append(f"{indent}![{alt_text}]({url})")
if caption:
markdown_lines.append(f"{indent}*{caption}*")
elif block_type == "video":
video = block["video"]
url = video.get("external", {}).get("url") or video.get("file", {}).get(
"url", ""
)
caption = parse_rich_text(video.get("caption", []))
markdown_lines.append(f"{indent}[Video]({url})")
if caption:
markdown_lines.append(f"{indent}*{caption}*")
elif block_type == "file":
file = block["file"]
url = file.get("external", {}).get("url") or file.get("file", {}).get("url", "")
caption = parse_rich_text(file.get("caption", []))
name = caption if caption else "File"
markdown_lines.append(f"{indent}[{name}]({url})")
elif block_type == "bookmark":
url = block["bookmark"].get("url", "")
caption = parse_rich_text(block["bookmark"].get("caption", []))
markdown_lines.append(f"{indent}[{caption if caption else url}]({url})")
elif block_type == "equation":
expression = block["equation"].get("expression", "")
markdown_lines.append(f"{indent}$${expression}$$")
elif block_type == "callout":
text = rich_text_to_markdown(block["callout"].get("rich_text", []))
icon = block["callout"].get("icon", {})
if icon.get("emoji"):
markdown_lines.append(f"{indent}> {icon['emoji']} {text}")
else:
markdown_lines.append(f"{indent}> {text}")
elif block_type == "child_page":
title = block["child_page"].get("title", "Untitled")
markdown_lines.append(f"{indent}📄 [{title}](notion://page/{block['id']})")
elif block_type == "child_database":
title = block["child_database"].get("title", "Untitled Database")
markdown_lines.append(f"{indent}🗂️ [{title}](notion://database/{block['id']})")
elif block_type == "table":
# Tables are complex - for now just indicate there's a table
markdown_lines.append(
f"{indent}[Table with {block['table'].get('table_width', 0)} columns]"
)
elif block_type == "column_list":
# Process columns
if block.get("children"):
markdown_lines.append(f"{indent}<div style='display: flex'>")
for column in block["children"]:
markdown_lines.append(f"{indent}<div style='flex: 1'>")
if column.get("children"):
for child in column["children"]:
child_markdown = block_to_markdown(child, indent_level + 1)
if child_markdown:
markdown_lines.append(child_markdown)
markdown_lines.append(f"{indent}</div>")
markdown_lines.append(f"{indent}</div>")
# Handle children for blocks that haven't been processed yet
elif block.get("children") and block_type not in ["toggle", "column_list"]:
for child in block["children"]:
child_markdown = block_to_markdown(child, indent_level)
if child_markdown:
markdown_lines.append(child_markdown)
return "\n".join(markdown_lines) if markdown_lines else ""
def blocks_to_markdown(blocks: List[dict]) -> str:
"""
Convert a list of Notion blocks to a markdown document.
Args:
blocks: List of block objects from Notion API.
Returns:
Complete markdown document as a string.
"""
markdown_parts = []
for i, block in enumerate(blocks):
markdown = block_to_markdown(block)
if markdown:
markdown_parts.append(markdown)
# Add spacing between top-level blocks (except lists)
if i < len(blocks) - 1:
next_type = blocks[i + 1].get("type", "")
current_type = block.get("type", "")
# Don't add extra spacing between list items
list_types = {"bulleted_list_item", "numbered_list_item", "to_do"}
if not (current_type in list_types and next_type in list_types):
markdown_parts.append("")
return "\n".join(markdown_parts)
def extract_page_title(page: dict) -> str:
"""
Extract the title from a Notion page object.
Args:
page: Page object from Notion API.
Returns:
Page title as a string.
"""
properties = page.get("properties", {})
# Find the title property (it has type "title")
for prop_name, prop_value in properties.items():
if prop_value.get("type") == "title":
return parse_rich_text(prop_value.get("title", []))
return "Untitled"

View File

@@ -1,42 +0,0 @@
from typing import Literal
from pydantic import SecretStr
from backend.data.model import CredentialsField, CredentialsMetaInput, OAuth2Credentials
from backend.integrations.providers import ProviderName
from backend.util.settings import Secrets
secrets = Secrets()
NOTION_OAUTH_IS_CONFIGURED = bool(
secrets.notion_client_id and secrets.notion_client_secret
)
NotionCredentials = OAuth2Credentials
NotionCredentialsInput = CredentialsMetaInput[
Literal[ProviderName.NOTION], Literal["oauth2"]
]
def NotionCredentialsField() -> NotionCredentialsInput:
"""Creates a Notion OAuth2 credentials field."""
return CredentialsField(
description="Connect your Notion account. Ensure the pages/databases are shared with the integration."
)
# Test credentials for Notion OAuth2
TEST_CREDENTIALS = OAuth2Credentials(
id="01234567-89ab-cdef-0123-456789abcdef",
provider="notion",
access_token=SecretStr("test_access_token"),
title="Mock Notion OAuth",
scopes=["read_content", "insert_content", "update_content"],
username="testuser",
)
TEST_CREDENTIALS_INPUT = {
"provider": TEST_CREDENTIALS.provider,
"id": TEST_CREDENTIALS.id,
"type": TEST_CREDENTIALS.type,
"title": TEST_CREDENTIALS.title,
}

View File

@@ -1,360 +0,0 @@
from __future__ import annotations
from typing import Any, Dict, List, Optional
from pydantic import model_validator
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import OAuth2Credentials, SchemaField
from ._api import NotionClient
from ._auth import (
NOTION_OAUTH_IS_CONFIGURED,
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
NotionCredentialsField,
NotionCredentialsInput,
)
class NotionCreatePageBlock(Block):
"""Create a new page in Notion with content."""
class Input(BlockSchema):
credentials: NotionCredentialsInput = NotionCredentialsField()
parent_page_id: Optional[str] = SchemaField(
description="Parent page ID to create the page under. Either this OR parent_database_id is required.",
default=None,
)
parent_database_id: Optional[str] = SchemaField(
description="Parent database ID to create the page in. Either this OR parent_page_id is required.",
default=None,
)
title: str = SchemaField(
description="Title of the new page",
)
content: Optional[str] = SchemaField(
description="Content for the page. Can be plain text or markdown - will be converted to Notion blocks.",
default=None,
)
properties: Optional[Dict[str, Any]] = SchemaField(
description="Additional properties for database pages (e.g., {'Status': 'In Progress', 'Priority': 'High'})",
default=None,
)
icon_emoji: Optional[str] = SchemaField(
description="Emoji to use as the page icon (e.g., '📄', '🚀')", default=None
)
@model_validator(mode="after")
def validate_parent(self):
"""Ensure either parent_page_id or parent_database_id is provided."""
if not self.parent_page_id and not self.parent_database_id:
raise ValueError(
"Either parent_page_id or parent_database_id must be provided"
)
if self.parent_page_id and self.parent_database_id:
raise ValueError(
"Only one of parent_page_id or parent_database_id should be provided, not both"
)
return self
class Output(BlockSchema):
page_id: str = SchemaField(description="ID of the created page.")
page_url: str = SchemaField(description="URL of the created page.")
error: str = SchemaField(description="Error message if the operation failed.")
def __init__(self):
super().__init__(
id="c15febe0-66ce-4c6f-aebd-5ab351653804",
description="Create a new page in Notion. Requires EITHER a parent_page_id OR parent_database_id. Supports markdown content.",
categories={BlockCategory.PRODUCTIVITY},
input_schema=NotionCreatePageBlock.Input,
output_schema=NotionCreatePageBlock.Output,
disabled=not NOTION_OAUTH_IS_CONFIGURED,
test_input={
"parent_page_id": "00000000-0000-0000-0000-000000000000",
"title": "Test Page",
"content": "This is test content.",
"credentials": TEST_CREDENTIALS_INPUT,
},
test_output=[
("page_id", "12345678-1234-1234-1234-123456789012"),
(
"page_url",
"https://notion.so/Test-Page-12345678123412341234123456789012",
),
],
test_credentials=TEST_CREDENTIALS,
test_mock={
"create_page": lambda *args, **kwargs: (
"12345678-1234-1234-1234-123456789012",
"https://notion.so/Test-Page-12345678123412341234123456789012",
)
},
)
@staticmethod
def _markdown_to_blocks(content: str) -> List[dict]:
"""Convert markdown content to Notion block objects."""
if not content:
return []
blocks = []
lines = content.split("\n")
i = 0
while i < len(lines):
line = lines[i]
# Skip empty lines
if not line.strip():
i += 1
continue
# Headings
if line.startswith("### "):
blocks.append(
{
"type": "heading_3",
"heading_3": {
"rich_text": [
{"type": "text", "text": {"content": line[4:].strip()}}
]
},
}
)
elif line.startswith("## "):
blocks.append(
{
"type": "heading_2",
"heading_2": {
"rich_text": [
{"type": "text", "text": {"content": line[3:].strip()}}
]
},
}
)
elif line.startswith("# "):
blocks.append(
{
"type": "heading_1",
"heading_1": {
"rich_text": [
{"type": "text", "text": {"content": line[2:].strip()}}
]
},
}
)
# Bullet points
elif line.strip().startswith("- "):
blocks.append(
{
"type": "bulleted_list_item",
"bulleted_list_item": {
"rich_text": [
{
"type": "text",
"text": {"content": line.strip()[2:].strip()},
}
]
},
}
)
# Numbered list
elif line.strip() and line.strip()[0].isdigit() and ". " in line:
content_start = line.find(". ") + 2
blocks.append(
{
"type": "numbered_list_item",
"numbered_list_item": {
"rich_text": [
{
"type": "text",
"text": {"content": line[content_start:].strip()},
}
]
},
}
)
# Code block
elif line.strip().startswith("```"):
code_lines = []
language = line[3:].strip() or "plain text"
i += 1
while i < len(lines) and not lines[i].strip().startswith("```"):
code_lines.append(lines[i])
i += 1
blocks.append(
{
"type": "code",
"code": {
"rich_text": [
{
"type": "text",
"text": {"content": "\n".join(code_lines)},
}
],
"language": language,
},
}
)
# Quote
elif line.strip().startswith("> "):
blocks.append(
{
"type": "quote",
"quote": {
"rich_text": [
{
"type": "text",
"text": {"content": line.strip()[2:].strip()},
}
]
},
}
)
# Horizontal rule
elif line.strip() in ["---", "***", "___"]:
blocks.append({"type": "divider", "divider": {}})
# Regular paragraph
else:
# Parse for basic markdown formatting
text_content = line.strip()
rich_text = []
# Simple bold/italic parsing (this is simplified)
if "**" in text_content or "*" in text_content:
# For now, just pass as plain text
# A full implementation would parse and create proper annotations
rich_text = [{"type": "text", "text": {"content": text_content}}]
else:
rich_text = [{"type": "text", "text": {"content": text_content}}]
blocks.append(
{"type": "paragraph", "paragraph": {"rich_text": rich_text}}
)
i += 1
return blocks
@staticmethod
def _build_properties(
title: str, additional_properties: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""Build properties object for page creation."""
properties: Dict[str, Any] = {
"title": {"title": [{"type": "text", "text": {"content": title}}]}
}
if additional_properties:
for key, value in additional_properties.items():
if key.lower() == "title":
continue # Skip title as we already have it
# Try to intelligently map property types
if isinstance(value, bool):
properties[key] = {"checkbox": value}
elif isinstance(value, (int, float)):
properties[key] = {"number": value}
elif isinstance(value, list):
# Assume multi-select
properties[key] = {
"multi_select": [{"name": str(item)} for item in value]
}
elif isinstance(value, str):
# Could be select, rich_text, or other types
# For simplicity, try common patterns
if key.lower() in ["status", "priority", "type", "category"]:
properties[key] = {"select": {"name": value}}
elif key.lower() in ["url", "link"]:
properties[key] = {"url": value}
elif key.lower() in ["email"]:
properties[key] = {"email": value}
else:
properties[key] = {
"rich_text": [{"type": "text", "text": {"content": value}}]
}
return properties
@staticmethod
async def create_page(
credentials: OAuth2Credentials,
title: str,
parent_page_id: Optional[str] = None,
parent_database_id: Optional[str] = None,
content: Optional[str] = None,
properties: Optional[Dict[str, Any]] = None,
icon_emoji: Optional[str] = None,
) -> tuple[str, str]:
"""
Create a new Notion page.
Returns:
Tuple of (page_id, page_url)
"""
if not parent_page_id and not parent_database_id:
raise ValueError(
"Either parent_page_id or parent_database_id must be provided"
)
if parent_page_id and parent_database_id:
raise ValueError(
"Only one of parent_page_id or parent_database_id should be provided, not both"
)
client = NotionClient(credentials)
# Build parent object
if parent_page_id:
parent = {"type": "page_id", "page_id": parent_page_id}
else:
parent = {"type": "database_id", "database_id": parent_database_id}
# Build properties
page_properties = NotionCreatePageBlock._build_properties(title, properties)
# Convert content to blocks if provided
children = None
if content:
children = NotionCreatePageBlock._markdown_to_blocks(content)
# Build icon if provided
icon = None
if icon_emoji:
icon = {"type": "emoji", "emoji": icon_emoji}
# Create the page
result = await client.create_page(
parent=parent, properties=page_properties, children=children, icon=icon
)
page_id = result.get("id", "")
page_url = result.get("url", "")
if not page_id or not page_url:
raise ValueError("Failed to get page ID or URL from Notion response")
return page_id, page_url
async def run(
self,
input_data: Input,
*,
credentials: OAuth2Credentials,
**kwargs,
) -> BlockOutput:
try:
page_id, page_url = await self.create_page(
credentials,
input_data.title,
input_data.parent_page_id,
input_data.parent_database_id,
input_data.content,
input_data.properties,
input_data.icon_emoji,
)
yield "page_id", page_id
yield "page_url", page_url
except Exception as e:
yield "error", str(e) if str(e) else "Unknown error"

View File

@@ -1,285 +0,0 @@
from __future__ import annotations
from typing import Any, Dict, List, Optional
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import OAuth2Credentials, SchemaField
from ._api import NotionClient, parse_rich_text
from ._auth import (
NOTION_OAUTH_IS_CONFIGURED,
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
NotionCredentialsField,
NotionCredentialsInput,
)
class NotionReadDatabaseBlock(Block):
"""Query a Notion database and retrieve entries with their properties."""
class Input(BlockSchema):
credentials: NotionCredentialsInput = NotionCredentialsField()
database_id: str = SchemaField(
description="Notion database ID. Must be accessible by the connected integration.",
)
filter_property: Optional[str] = SchemaField(
description="Property name to filter by (e.g., 'Status', 'Priority')",
default=None,
)
filter_value: Optional[str] = SchemaField(
description="Value to filter for in the specified property", default=None
)
sort_property: Optional[str] = SchemaField(
description="Property name to sort by", default=None
)
sort_direction: Optional[str] = SchemaField(
description="Sort direction: 'ascending' or 'descending'",
default="ascending",
)
limit: int = SchemaField(
description="Maximum number of entries to retrieve",
default=100,
ge=1,
le=100,
)
class Output(BlockSchema):
entries: List[Dict[str, Any]] = SchemaField(
description="List of database entries with their properties."
)
entry: Dict[str, Any] = SchemaField(
description="Individual database entry (yields one per entry found)."
)
entry_ids: List[str] = SchemaField(
description="List of entry IDs for batch operations."
)
entry_id: str = SchemaField(
description="Individual entry ID (yields one per entry found)."
)
count: int = SchemaField(description="Number of entries retrieved.")
database_title: str = SchemaField(description="Title of the database.")
error: str = SchemaField(description="Error message if the operation failed.")
def __init__(self):
super().__init__(
id="fcd53135-88c9-4ba3-be50-cc6936286e6c",
description="Query a Notion database with optional filtering and sorting, returning structured entries.",
categories={BlockCategory.PRODUCTIVITY},
input_schema=NotionReadDatabaseBlock.Input,
output_schema=NotionReadDatabaseBlock.Output,
disabled=not NOTION_OAUTH_IS_CONFIGURED,
test_input={
"database_id": "00000000-0000-0000-0000-000000000000",
"limit": 10,
"credentials": TEST_CREDENTIALS_INPUT,
},
test_output=[
(
"entries",
[{"Name": "Test Entry", "Status": "Active", "_id": "test-123"}],
),
("entry_ids", ["test-123"]),
(
"entry",
{"Name": "Test Entry", "Status": "Active", "_id": "test-123"},
),
("entry_id", "test-123"),
("count", 1),
("database_title", "Test Database"),
],
test_credentials=TEST_CREDENTIALS,
test_mock={
"query_database": lambda *args, **kwargs: (
[{"Name": "Test Entry", "Status": "Active", "_id": "test-123"}],
1,
"Test Database",
)
},
)
@staticmethod
def _parse_property_value(prop: dict) -> Any:
"""Parse a Notion property value into a simple Python type."""
prop_type = prop.get("type")
if prop_type == "title":
return parse_rich_text(prop.get("title", []))
elif prop_type == "rich_text":
return parse_rich_text(prop.get("rich_text", []))
elif prop_type == "number":
return prop.get("number")
elif prop_type == "select":
select = prop.get("select")
return select.get("name") if select else None
elif prop_type == "multi_select":
return [item.get("name") for item in prop.get("multi_select", [])]
elif prop_type == "date":
date = prop.get("date")
if date:
return date.get("start")
return None
elif prop_type == "checkbox":
return prop.get("checkbox", False)
elif prop_type == "url":
return prop.get("url")
elif prop_type == "email":
return prop.get("email")
elif prop_type == "phone_number":
return prop.get("phone_number")
elif prop_type == "people":
return [
person.get("name", person.get("id"))
for person in prop.get("people", [])
]
elif prop_type == "files":
files = prop.get("files", [])
return [
f.get(
"name",
f.get("external", {}).get("url", f.get("file", {}).get("url")),
)
for f in files
]
elif prop_type == "relation":
return [rel.get("id") for rel in prop.get("relation", [])]
elif prop_type == "formula":
formula = prop.get("formula", {})
return formula.get(formula.get("type"))
elif prop_type == "rollup":
rollup = prop.get("rollup", {})
return rollup.get(rollup.get("type"))
elif prop_type == "created_time":
return prop.get("created_time")
elif prop_type == "created_by":
return prop.get("created_by", {}).get(
"name", prop.get("created_by", {}).get("id")
)
elif prop_type == "last_edited_time":
return prop.get("last_edited_time")
elif prop_type == "last_edited_by":
return prop.get("last_edited_by", {}).get(
"name", prop.get("last_edited_by", {}).get("id")
)
else:
# Return the raw value for unknown types
return prop
@staticmethod
def _build_filter(property_name: str, value: str) -> dict:
"""Build a simple filter object for a property."""
# This is a simplified filter - in reality, you'd need to know the property type
# For now, we'll try common filter types
return {
"or": [
{"property": property_name, "rich_text": {"contains": value}},
{"property": property_name, "title": {"contains": value}},
{"property": property_name, "select": {"equals": value}},
{"property": property_name, "multi_select": {"contains": value}},
]
}
@staticmethod
async def query_database(
credentials: OAuth2Credentials,
database_id: str,
filter_property: Optional[str] = None,
filter_value: Optional[str] = None,
sort_property: Optional[str] = None,
sort_direction: str = "ascending",
limit: int = 100,
) -> tuple[List[Dict[str, Any]], int, str]:
"""
Query a Notion database and parse the results.
Returns:
Tuple of (entries_list, count, database_title)
"""
client = NotionClient(credentials)
# Build filter if specified
filter_obj = None
if filter_property and filter_value:
filter_obj = NotionReadDatabaseBlock._build_filter(
filter_property, filter_value
)
# Build sorts if specified
sorts = None
if sort_property:
sorts = [{"property": sort_property, "direction": sort_direction}]
# Query the database
result = await client.query_database(
database_id, filter_obj=filter_obj, sorts=sorts, page_size=limit
)
# Parse the entries
entries = []
for page in result.get("results", []):
entry = {}
properties = page.get("properties", {})
for prop_name, prop_value in properties.items():
entry[prop_name] = NotionReadDatabaseBlock._parse_property_value(
prop_value
)
# Add metadata
entry["_id"] = page.get("id")
entry["_url"] = page.get("url")
entry["_created_time"] = page.get("created_time")
entry["_last_edited_time"] = page.get("last_edited_time")
entries.append(entry)
# Get database title (we need to make a separate call for this)
try:
database_url = f"https://api.notion.com/v1/databases/{database_id}"
db_response = await client.requests.get(
database_url, headers=client.headers
)
if db_response.ok:
db_data = db_response.json()
db_title = parse_rich_text(db_data.get("title", []))
else:
db_title = "Unknown Database"
except Exception:
db_title = "Unknown Database"
return entries, len(entries), db_title
async def run(
self,
input_data: Input,
*,
credentials: OAuth2Credentials,
**kwargs,
) -> BlockOutput:
try:
entries, count, db_title = await self.query_database(
credentials,
input_data.database_id,
input_data.filter_property,
input_data.filter_value,
input_data.sort_property,
input_data.sort_direction or "ascending",
input_data.limit,
)
# Yield the complete list for batch operations
yield "entries", entries
# Extract and yield IDs as a list for batch operations
entry_ids = [entry["_id"] for entry in entries if "_id" in entry]
yield "entry_ids", entry_ids
# Yield each individual entry and its ID for single connections
for entry in entries:
yield "entry", entry
if "_id" in entry:
yield "entry_id", entry["_id"]
yield "count", count
yield "database_title", db_title
except Exception as e:
yield "error", str(e) if str(e) else "Unknown error"

View File

@@ -1,64 +0,0 @@
from __future__ import annotations
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import OAuth2Credentials, SchemaField
from ._api import NotionClient
from ._auth import (
NOTION_OAUTH_IS_CONFIGURED,
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
NotionCredentialsField,
NotionCredentialsInput,
)
class NotionReadPageBlock(Block):
"""Read a Notion page by ID and return its raw JSON."""
class Input(BlockSchema):
credentials: NotionCredentialsInput = NotionCredentialsField()
page_id: str = SchemaField(
description="Notion page ID. Must be accessible by the connected integration. You can get this from the page URL notion.so/A-Page-586edd711467478da59fe3ce29a1ffab would be 586edd711467478da59fe35e29a1ffab",
)
class Output(BlockSchema):
page: dict = SchemaField(description="Raw Notion page JSON.")
error: str = SchemaField(description="Error message if the operation failed.")
def __init__(self):
super().__init__(
id="5246cc1d-34b7-452b-8fc5-3fb25fd8f542",
description="Read a Notion page by its ID and return its raw JSON.",
categories={BlockCategory.PRODUCTIVITY},
input_schema=NotionReadPageBlock.Input,
output_schema=NotionReadPageBlock.Output,
disabled=not NOTION_OAUTH_IS_CONFIGURED,
test_input={
"page_id": "00000000-0000-0000-0000-000000000000",
"credentials": TEST_CREDENTIALS_INPUT,
},
test_output=[("page", dict)],
test_credentials=TEST_CREDENTIALS,
test_mock={
"get_page": lambda *args, **kwargs: {"object": "page", "id": "mocked"}
},
)
@staticmethod
async def get_page(credentials: OAuth2Credentials, page_id: str) -> dict:
client = NotionClient(credentials)
return await client.get_page(page_id)
async def run(
self,
input_data: Input,
*,
credentials: OAuth2Credentials,
**kwargs,
) -> BlockOutput:
try:
page = await self.get_page(credentials, input_data.page_id)
yield "page", page
except Exception as e:
yield "error", str(e) if str(e) else "Unknown error"

View File

@@ -1,109 +0,0 @@
from __future__ import annotations
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import OAuth2Credentials, SchemaField
from ._api import NotionClient, blocks_to_markdown, extract_page_title
from ._auth import (
NOTION_OAUTH_IS_CONFIGURED,
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
NotionCredentialsField,
NotionCredentialsInput,
)
class NotionReadPageMarkdownBlock(Block):
"""Read a Notion page and convert it to clean Markdown format."""
class Input(BlockSchema):
credentials: NotionCredentialsInput = NotionCredentialsField()
page_id: str = SchemaField(
description="Notion page ID. Must be accessible by the connected integration. You can get this from the page URL notion.so/A-Page-586edd711467478da59fe35e29a1ffab would be 586edd711467478da59fe35e29a1ffab",
)
include_title: bool = SchemaField(
description="Whether to include the page title as a header in the markdown",
default=True,
)
class Output(BlockSchema):
markdown: str = SchemaField(description="Page content in Markdown format.")
title: str = SchemaField(description="Page title.")
error: str = SchemaField(description="Error message if the operation failed.")
def __init__(self):
super().__init__(
id="d1312c4d-fae2-4e70-893d-f4d07cce1d4e",
description="Read a Notion page and convert it to Markdown format with proper formatting for headings, lists, links, and rich text.",
categories={BlockCategory.PRODUCTIVITY},
input_schema=NotionReadPageMarkdownBlock.Input,
output_schema=NotionReadPageMarkdownBlock.Output,
disabled=not NOTION_OAUTH_IS_CONFIGURED,
test_input={
"page_id": "00000000-0000-0000-0000-000000000000",
"include_title": True,
"credentials": TEST_CREDENTIALS_INPUT,
},
test_output=[
("markdown", "# Test Page\n\nThis is test content."),
("title", "Test Page"),
],
test_credentials=TEST_CREDENTIALS,
test_mock={
"get_page_markdown": lambda *args, **kwargs: (
"# Test Page\n\nThis is test content.",
"Test Page",
)
},
)
@staticmethod
async def get_page_markdown(
credentials: OAuth2Credentials, page_id: str, include_title: bool = True
) -> tuple[str, str]:
"""
Get a Notion page and convert it to markdown.
Args:
credentials: OAuth2 credentials for Notion.
page_id: The ID of the page to fetch.
include_title: Whether to include the page title in the markdown.
Returns:
Tuple of (markdown_content, title)
"""
client = NotionClient(credentials)
# Get page metadata
page = await client.get_page(page_id)
title = extract_page_title(page)
# Get all blocks from the page
blocks = await client.get_blocks(page_id, recursive=True)
# Convert blocks to markdown
content_markdown = blocks_to_markdown(blocks)
# Combine title and content if requested
if include_title and title:
full_markdown = f"# {title}\n\n{content_markdown}"
else:
full_markdown = content_markdown
return full_markdown, title
async def run(
self,
input_data: Input,
*,
credentials: OAuth2Credentials,
**kwargs,
) -> BlockOutput:
try:
markdown, title = await self.get_page_markdown(
credentials, input_data.page_id, input_data.include_title
)
yield "markdown", markdown
yield "title", title
except Exception as e:
yield "error", str(e) if str(e) else "Unknown error"

View File

@@ -1,225 +0,0 @@
from __future__ import annotations
from typing import List, Optional
from pydantic import BaseModel
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import OAuth2Credentials, SchemaField
from ._api import NotionClient, extract_page_title, parse_rich_text
from ._auth import (
NOTION_OAUTH_IS_CONFIGURED,
TEST_CREDENTIALS,
TEST_CREDENTIALS_INPUT,
NotionCredentialsField,
NotionCredentialsInput,
)
class NotionSearchResult(BaseModel):
"""Typed model for Notion search results."""
id: str
type: str # 'page' or 'database'
title: str
url: str
created_time: Optional[str] = None
last_edited_time: Optional[str] = None
parent_type: Optional[str] = None # 'page', 'database', or 'workspace'
parent_id: Optional[str] = None
icon: Optional[str] = None # emoji icon if present
is_inline: Optional[bool] = None # for databases only
class NotionSearchBlock(Block):
"""Search across your Notion workspace for pages and databases."""
class Input(BlockSchema):
credentials: NotionCredentialsInput = NotionCredentialsField()
query: str = SchemaField(
description="Search query text. Leave empty to get all accessible pages/databases.",
default="",
)
filter_type: Optional[str] = SchemaField(
description="Filter results by type: 'page' or 'database'. Leave empty for both.",
default=None,
)
limit: int = SchemaField(
description="Maximum number of results to return", default=20, ge=1, le=100
)
class Output(BlockSchema):
results: List[NotionSearchResult] = SchemaField(
description="List of search results with title, type, URL, and metadata."
)
result: NotionSearchResult = SchemaField(
description="Individual search result (yields one per result found)."
)
result_ids: List[str] = SchemaField(
description="List of IDs from search results for batch operations."
)
count: int = SchemaField(description="Number of results found.")
error: str = SchemaField(description="Error message if the operation failed.")
def __init__(self):
super().__init__(
id="313515dd-9848-46ea-9cd6-3c627c892c56",
description="Search your Notion workspace for pages and databases by text query.",
categories={BlockCategory.PRODUCTIVITY, BlockCategory.SEARCH},
input_schema=NotionSearchBlock.Input,
output_schema=NotionSearchBlock.Output,
disabled=not NOTION_OAUTH_IS_CONFIGURED,
test_input={
"query": "project",
"limit": 5,
"credentials": TEST_CREDENTIALS_INPUT,
},
test_output=[
(
"results",
[
NotionSearchResult(
id="123",
type="page",
title="Project Plan",
url="https://notion.so/Project-Plan-123",
)
],
),
("result_ids", ["123"]),
(
"result",
NotionSearchResult(
id="123",
type="page",
title="Project Plan",
url="https://notion.so/Project-Plan-123",
),
),
("count", 1),
],
test_credentials=TEST_CREDENTIALS,
test_mock={
"search_workspace": lambda *args, **kwargs: (
[
NotionSearchResult(
id="123",
type="page",
title="Project Plan",
url="https://notion.so/Project-Plan-123",
)
],
1,
)
},
)
@staticmethod
async def search_workspace(
credentials: OAuth2Credentials,
query: str = "",
filter_type: Optional[str] = None,
limit: int = 20,
) -> tuple[List[NotionSearchResult], int]:
"""
Search the Notion workspace.
Returns:
Tuple of (results_list, count)
"""
client = NotionClient(credentials)
# Build filter if type is specified
filter_obj = None
if filter_type:
filter_obj = {"property": "object", "value": filter_type}
# Execute search
response = await client.search(
query=query, filter_obj=filter_obj, page_size=limit
)
# Parse results
results = []
for item in response.get("results", []):
result_data = {
"id": item.get("id", ""),
"type": item.get("object", ""),
"url": item.get("url", ""),
"created_time": item.get("created_time"),
"last_edited_time": item.get("last_edited_time"),
"title": "", # Will be set below
}
# Extract title based on type
if item.get("object") == "page":
# For pages, get the title from properties
result_data["title"] = extract_page_title(item)
# Add parent info
parent = item.get("parent", {})
if parent.get("type") == "page_id":
result_data["parent_type"] = "page"
result_data["parent_id"] = parent.get("page_id")
elif parent.get("type") == "database_id":
result_data["parent_type"] = "database"
result_data["parent_id"] = parent.get("database_id")
elif parent.get("type") == "workspace":
result_data["parent_type"] = "workspace"
# Add icon if present
icon = item.get("icon")
if icon and icon.get("type") == "emoji":
result_data["icon"] = icon.get("emoji")
elif item.get("object") == "database":
# For databases, get title from the title array
result_data["title"] = parse_rich_text(item.get("title", []))
# Add database-specific metadata
result_data["is_inline"] = item.get("is_inline", False)
# Add parent info
parent = item.get("parent", {})
if parent.get("type") == "page_id":
result_data["parent_type"] = "page"
result_data["parent_id"] = parent.get("page_id")
elif parent.get("type") == "workspace":
result_data["parent_type"] = "workspace"
# Add icon if present
icon = item.get("icon")
if icon and icon.get("type") == "emoji":
result_data["icon"] = icon.get("emoji")
results.append(NotionSearchResult(**result_data))
return results, len(results)
async def run(
self,
input_data: Input,
*,
credentials: OAuth2Credentials,
**kwargs,
) -> BlockOutput:
try:
results, count = await self.search_workspace(
credentials, input_data.query, input_data.filter_type, input_data.limit
)
# Yield the complete list for batch operations
yield "results", results
# Extract and yield IDs as a list for batch operations
result_ids = [r.id for r in results]
yield "result_ids", result_ids
# Yield each individual result for single connections
for result in results:
yield "result", result
yield "count", count
except Exception as e:
yield "error", str(e) if str(e) else "Unknown error"

View File

@@ -1,226 +0,0 @@
# flake8: noqa: E501
import logging
from enum import Enum
from typing import Any, Literal
import openai
from pydantic import SecretStr
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import (
APIKeyCredentials,
CredentialsField,
CredentialsMetaInput,
NodeExecutionStats,
SchemaField,
)
from backend.integrations.providers import ProviderName
from backend.util.logging import TruncatedLogger
logger = TruncatedLogger(logging.getLogger(__name__), "[Perplexity-Block]")
class PerplexityModel(str, Enum):
"""Perplexity sonar models available via OpenRouter"""
SONAR = "perplexity/sonar"
SONAR_PRO = "perplexity/sonar-pro"
SONAR_DEEP_RESEARCH = "perplexity/sonar-deep-research"
PerplexityCredentials = CredentialsMetaInput[
Literal[ProviderName.OPEN_ROUTER], Literal["api_key"]
]
TEST_CREDENTIALS = APIKeyCredentials(
id="test-perplexity-creds",
provider="open_router",
api_key=SecretStr("mock-openrouter-api-key"),
title="Mock OpenRouter API key",
expires_at=None,
)
TEST_CREDENTIALS_INPUT = {
"provider": TEST_CREDENTIALS.provider,
"id": TEST_CREDENTIALS.id,
"type": TEST_CREDENTIALS.type,
"title": TEST_CREDENTIALS.title,
}
def PerplexityCredentialsField() -> PerplexityCredentials:
return CredentialsField(
description="OpenRouter API key for accessing Perplexity models.",
)
class PerplexityBlock(Block):
class Input(BlockSchema):
prompt: str = SchemaField(
description="The query to send to the Perplexity model.",
placeholder="Enter your query here...",
)
model: PerplexityModel = SchemaField(
title="Perplexity Model",
default=PerplexityModel.SONAR,
description="The Perplexity sonar model to use.",
advanced=False,
)
credentials: PerplexityCredentials = PerplexityCredentialsField()
system_prompt: str = SchemaField(
title="System Prompt",
default="",
description="Optional system prompt to provide context to the model.",
advanced=True,
)
max_tokens: int | None = SchemaField(
advanced=True,
default=None,
description="The maximum number of tokens to generate.",
)
class Output(BlockSchema):
response: str = SchemaField(
description="The response from the Perplexity model."
)
annotations: list[dict[str, Any]] = SchemaField(
description="List of URL citations and annotations from the response."
)
error: str = SchemaField(description="Error message if the API call failed.")
def __init__(self):
super().__init__(
id="c8a5f2e9-8b3d-4a7e-9f6c-1d5e3c9b7a4f",
description="Query Perplexity's sonar models with real-time web search capabilities and receive annotated responses with source citations.",
categories={BlockCategory.AI, BlockCategory.SEARCH},
input_schema=PerplexityBlock.Input,
output_schema=PerplexityBlock.Output,
test_input={
"prompt": "What is the weather today?",
"model": PerplexityModel.SONAR,
"credentials": TEST_CREDENTIALS_INPUT,
},
test_credentials=TEST_CREDENTIALS,
test_output=[
("response", "The weather varies by location..."),
("annotations", list),
],
test_mock={
"call_perplexity": lambda *args, **kwargs: {
"response": "The weather varies by location...",
"annotations": [
{
"type": "url_citation",
"url_citation": {
"title": "weather.com",
"url": "https://weather.com",
},
}
],
}
},
)
self.execution_stats = NodeExecutionStats()
async def call_perplexity(
self,
credentials: APIKeyCredentials,
model: PerplexityModel,
prompt: str,
system_prompt: str = "",
max_tokens: int | None = None,
) -> dict[str, Any]:
"""Call Perplexity via OpenRouter and extract annotations."""
client = openai.AsyncOpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=credentials.api_key.get_secret_value(),
)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
response = await client.chat.completions.create(
extra_headers={
"HTTP-Referer": "https://agpt.co",
"X-Title": "AutoGPT",
},
model=model.value,
messages=messages,
max_tokens=max_tokens,
)
if not response.choices:
raise ValueError("No response from Perplexity via OpenRouter.")
# Extract the response content
response_content = response.choices[0].message.content or ""
# Extract annotations if present in the message
annotations = []
if hasattr(response.choices[0].message, "annotations"):
# If annotations are directly available
annotations = response.choices[0].message.annotations
else:
# Check if there's a raw response with annotations
raw = getattr(response.choices[0].message, "_raw_response", None)
if isinstance(raw, dict) and "annotations" in raw:
annotations = raw["annotations"]
if not annotations and hasattr(response, "model_extra"):
# Check model_extra for annotations
model_extra = response.model_extra
if isinstance(model_extra, dict):
# Check in choices
if "choices" in model_extra and len(model_extra["choices"]) > 0:
choice = model_extra["choices"][0]
if "message" in choice and "annotations" in choice["message"]:
annotations = choice["message"]["annotations"]
# Also check the raw response object for annotations
if not annotations:
raw = getattr(response, "_raw_response", None)
if isinstance(raw, dict):
# Check various possible locations for annotations
if "annotations" in raw:
annotations = raw["annotations"]
elif "choices" in raw and len(raw["choices"]) > 0:
choice = raw["choices"][0]
if "message" in choice and "annotations" in choice["message"]:
annotations = choice["message"]["annotations"]
# Update execution stats
if response.usage:
self.execution_stats.input_token_count = response.usage.prompt_tokens
self.execution_stats.output_token_count = (
response.usage.completion_tokens
)
return {"response": response_content, "annotations": annotations or []}
except Exception as e:
logger.error(f"Error calling Perplexity: {e}")
raise
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
logger.debug(f"Running Perplexity block with model: {input_data.model}")
try:
result = await self.call_perplexity(
credentials=credentials,
model=input_data.model,
prompt=input_data.prompt,
system_prompt=input_data.system_prompt,
max_tokens=input_data.max_tokens,
)
yield "response", result["response"]
yield "annotations", result["annotations"]
except Exception as e:
error_msg = f"Error calling Perplexity: {str(e)}"
logger.error(error_msg)
yield "error", error_msg

View File

@@ -1,7 +1,4 @@
import asyncio
import logging
import urllib.parse
import urllib.request
from datetime import datetime, timedelta, timezone
from typing import Any
@@ -104,38 +101,7 @@ class ReadRSSFeedBlock(Block):
@staticmethod
def parse_feed(url: str) -> dict[str, Any]:
# Security fix: Add protection against memory exhaustion attacks
MAX_FEED_SIZE = 10 * 1024 * 1024 # 10MB limit for RSS feeds
# Validate URL
parsed_url = urllib.parse.urlparse(url)
if parsed_url.scheme not in ("http", "https"):
raise ValueError(f"Invalid URL scheme: {parsed_url.scheme}")
# Download with size limit
try:
with urllib.request.urlopen(url, timeout=30) as response:
# Check content length if available
content_length = response.headers.get("Content-Length")
if content_length and int(content_length) > MAX_FEED_SIZE:
raise ValueError(
f"Feed too large: {content_length} bytes exceeds {MAX_FEED_SIZE} limit"
)
# Read with size limit
content = response.read(MAX_FEED_SIZE + 1)
if len(content) > MAX_FEED_SIZE:
raise ValueError(
f"Feed too large: exceeds {MAX_FEED_SIZE} byte limit"
)
# Parse with feedparser using the validated content
# feedparser has built-in protection against XML attacks
return feedparser.parse(content) # type: ignore
except Exception as e:
# Log error and return empty feed
logging.warning(f"Failed to parse RSS feed from {url}: {e}")
return {"entries": []}
return feedparser.parse(url) # type: ignore
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
keep_going = True

View File

@@ -13,11 +13,6 @@ from backend.data.block import (
BlockSchema,
BlockType,
)
from backend.data.dynamic_fields import (
extract_base_field_name,
get_dynamic_field_description,
is_dynamic_field,
)
from backend.data.model import NodeExecutionStats, SchemaField
from backend.util import json
from backend.util.clients import get_database_manager_async_client
@@ -103,22 +98,6 @@ def _create_tool_response(call_id: str, output: Any) -> dict[str, Any]:
return {"role": "tool", "tool_call_id": call_id, "content": content}
def _convert_raw_response_to_dict(raw_response: Any) -> dict[str, Any]:
"""
Safely convert raw_response to dictionary format for conversation history.
Handles different response types from different LLM providers.
"""
if isinstance(raw_response, str):
# Ollama returns a string, convert to dict format
return {"role": "assistant", "content": raw_response}
elif isinstance(raw_response, dict):
# Already a dict (from tests or some providers)
return raw_response
else:
# OpenAI/Anthropic return objects, convert with json.to_dict
return json.to_dict(raw_response)
def get_pending_tool_calls(conversation_history: list[Any]) -> dict[str, int]:
"""
All the tool calls entry in the conversation history requires a response.
@@ -282,7 +261,6 @@ class SmartDecisionMakerBlock(Block):
@staticmethod
def cleanup(s: str):
"""Clean up block names for use as tool function names."""
return re.sub(r"[^a-zA-Z0-9_-]", "_", s).lower()
@staticmethod
@@ -310,66 +288,41 @@ class SmartDecisionMakerBlock(Block):
}
sink_block_input_schema = block.input_schema
properties = {}
field_mapping = {} # clean_name -> original_name
for link in links:
field_name = link.sink_name
is_dynamic = is_dynamic_field(field_name)
# Clean property key to ensure Anthropic API compatibility for ALL fields
clean_field_name = SmartDecisionMakerBlock.cleanup(field_name)
field_mapping[clean_field_name] = field_name
sink_name = SmartDecisionMakerBlock.cleanup(link.sink_name)
if is_dynamic:
# For dynamic fields, use cleaned name but preserve original in description
properties[clean_field_name] = {
# Handle dynamic fields (e.g., values_#_*, items_$_*, etc.)
# These are fields that get merged by the executor into their base field
if (
"_#_" in link.sink_name
or "_$_" in link.sink_name
or "_@_" in link.sink_name
):
# For dynamic fields, provide a generic string schema
# The executor will handle merging these into the appropriate structure
properties[sink_name] = {
"type": "string",
"description": get_dynamic_field_description(field_name),
"description": f"Dynamic value for {link.sink_name}",
}
else:
# For regular fields, use the block's schema directly
# For regular fields, use the block's schema
try:
properties[clean_field_name] = (
sink_block_input_schema.get_field_schema(field_name)
properties[sink_name] = sink_block_input_schema.get_field_schema(
link.sink_name
)
except (KeyError, AttributeError):
# If field doesn't exist in schema, provide a generic one
properties[clean_field_name] = {
# If the field doesn't exist in the schema, provide a generic schema
properties[sink_name] = {
"type": "string",
"description": f"Value for {field_name}",
"description": f"Value for {link.sink_name}",
}
# Build the parameters schema using a single unified path
base_schema = block.input_schema.jsonschema()
base_required = set(base_schema.get("required", []))
# Compute required fields at the leaf level:
# - If a linked field is dynamic and its base is required in the block schema, require the leaf
# - If a linked field is regular and is required in the block schema, require the leaf
required_fields: set[str] = set()
for link in links:
field_name = link.sink_name
is_dynamic = is_dynamic_field(field_name)
# Always use cleaned field name for property key (Anthropic API compliance)
clean_field_name = SmartDecisionMakerBlock.cleanup(field_name)
if is_dynamic:
base_name = extract_base_field_name(field_name)
if base_name in base_required:
required_fields.add(clean_field_name)
else:
if field_name in base_required:
required_fields.add(clean_field_name)
tool_function["parameters"] = {
"type": "object",
**block.input_schema.jsonschema(),
"properties": properties,
"additionalProperties": False,
"required": sorted(required_fields),
}
# Store field mapping for later use in output processing
tool_function["_field_mapping"] = field_mapping
return {"type": "function", "function": tool_function}
@staticmethod
@@ -413,12 +366,13 @@ class SmartDecisionMakerBlock(Block):
sink_block_properties = sink_block_input_schema.get("properties", {}).get(
link.sink_name, {}
)
sink_name = SmartDecisionMakerBlock.cleanup(link.sink_name)
description = (
sink_block_properties["description"]
if "description" in sink_block_properties
else f"The {link.sink_name} of the tool"
)
properties[link.sink_name] = {
properties[sink_name] = {
"type": "string",
"description": description,
"default": json.dumps(sink_block_properties.get("default", None)),
@@ -434,17 +388,24 @@ class SmartDecisionMakerBlock(Block):
return {"type": "function", "function": tool_function}
@staticmethod
async def _create_function_signature(
node_id: str,
) -> list[dict[str, Any]]:
async def _create_function_signature(node_id: str) -> list[dict[str, Any]]:
"""
Creates function signatures for connected tools.
Creates function signatures for tools linked to a specified node within a graph.
This method filters the graph links to identify those that are tools and are
connected to the given node_id. It then constructs function signatures for each
tool based on the metadata and input schema of the linked nodes.
Args:
node_id: The node_id for which to create function signatures.
Returns:
List of function signatures for tools
list[dict[str, Any]]: A list of dictionaries, each representing a function signature
for a tool, including its name, description, and parameters.
Raises:
ValueError: If no tool links are found for the specified node_id, or if a sink node
or its metadata cannot be found.
"""
db_client = get_database_manager_async_client()
tools = [
@@ -469,116 +430,20 @@ class SmartDecisionMakerBlock(Block):
raise ValueError(f"Sink node not found: {links[0].sink_id}")
if sink_node.block_id == AgentExecutorBlock().id:
tool_func = (
return_tool_functions.append(
await SmartDecisionMakerBlock._create_agent_function_signature(
sink_node, links
)
)
return_tool_functions.append(tool_func)
else:
tool_func = (
return_tool_functions.append(
await SmartDecisionMakerBlock._create_block_function_signature(
sink_node, links
)
)
return_tool_functions.append(tool_func)
return return_tool_functions
async def _attempt_llm_call_with_validation(
self,
credentials: llm.APIKeyCredentials,
input_data: Input,
current_prompt: list[dict],
tool_functions: list[dict[str, Any]],
):
"""
Attempt a single LLM call with tool validation.
Returns the response if successful, raises ValueError if validation fails.
"""
resp = await llm.llm_call(
credentials=credentials,
llm_model=input_data.model,
prompt=current_prompt,
max_tokens=input_data.max_tokens,
tools=tool_functions,
ollama_host=input_data.ollama_host,
parallel_tool_calls=input_data.multiple_tool_calls,
)
# Track LLM usage stats per call
self.merge_stats(
NodeExecutionStats(
input_token_count=resp.prompt_tokens,
output_token_count=resp.completion_tokens,
llm_call_count=1,
)
)
if not resp.tool_calls:
return resp
validation_errors_list: list[str] = []
for tool_call in resp.tool_calls:
tool_name = tool_call.function.name
try:
tool_args = json.loads(tool_call.function.arguments)
except Exception as e:
validation_errors_list.append(
f"Tool call '{tool_name}' has invalid JSON arguments: {e}"
)
continue
# Find the tool definition to get the expected arguments
tool_def = next(
(
tool
for tool in tool_functions
if tool["function"]["name"] == tool_name
),
None,
)
if tool_def is None and len(tool_functions) == 1:
tool_def = tool_functions[0]
# Get parameters schema from tool definition
if (
tool_def
and "function" in tool_def
and "parameters" in tool_def["function"]
):
parameters = tool_def["function"]["parameters"]
expected_args = parameters.get("properties", {})
required_params = set(parameters.get("required", []))
else:
expected_args = {arg: {} for arg in tool_args.keys()}
required_params = set()
# Validate tool call arguments
provided_args = set(tool_args.keys())
expected_args_set = set(expected_args.keys())
# Check for unexpected arguments (typos)
unexpected_args = provided_args - expected_args_set
# Only check for missing REQUIRED parameters
missing_required_args = required_params - provided_args
if unexpected_args or missing_required_args:
error_msg = f"Tool call '{tool_name}' has parameter errors:"
if unexpected_args:
error_msg += f" Unknown parameters: {sorted(unexpected_args)}."
if missing_required_args:
error_msg += f" Missing required parameters: {sorted(missing_required_args)}."
error_msg += f" Expected parameters: {sorted(expected_args_set)}."
if required_params:
error_msg += f" Required parameters: {sorted(required_params)}."
validation_errors_list.append(error_msg)
if validation_errors_list:
raise ValueError("; ".join(validation_errors_list))
return resp
async def run(
self,
input_data: Input,
@@ -601,19 +466,27 @@ class SmartDecisionMakerBlock(Block):
if pending_tool_calls and input_data.last_tool_output is None:
raise ValueError(f"Tool call requires an output for {pending_tool_calls}")
# Only assign the last tool output to the first pending tool call
tool_output = []
if pending_tool_calls and input_data.last_tool_output is not None:
# Get the first pending tool call ID
first_call_id = next(iter(pending_tool_calls.keys()))
tool_output.append(
_create_tool_response(first_call_id, input_data.last_tool_output)
)
# Add tool output to prompt right away
prompt.extend(tool_output)
# Check if there are still pending tool calls after handling the first one
remaining_pending_calls = get_pending_tool_calls(prompt)
# If there are still pending tool calls, yield the conversation and return early
if remaining_pending_calls:
yield "conversations", prompt
return
# Fallback on adding tool output in the conversation history as user prompt.
elif input_data.last_tool_output:
logger.error(
f"[SmartDecisionMakerBlock-node_exec_id={node_exec_id}] "
@@ -646,33 +519,25 @@ class SmartDecisionMakerBlock(Block):
):
prompt.append({"role": "user", "content": prefix + input_data.prompt})
current_prompt = list(prompt)
max_attempts = max(1, int(input_data.retry))
response = None
response = await llm.llm_call(
credentials=credentials,
llm_model=input_data.model,
prompt=prompt,
json_format=False,
max_tokens=input_data.max_tokens,
tools=tool_functions,
ollama_host=input_data.ollama_host,
parallel_tool_calls=input_data.multiple_tool_calls,
)
last_error = None
for attempt in range(max_attempts):
try:
response = await self._attempt_llm_call_with_validation(
credentials, input_data, current_prompt, tool_functions
)
break
except ValueError as e:
last_error = e
error_feedback = (
"Your tool call had parameter errors. Please fix the following issues and try again:\n"
+ f"- {str(e)}\n"
+ "\nPlease make sure to use the exact parameter names as specified in the function schema."
)
current_prompt = list(current_prompt) + [
{"role": "user", "content": error_feedback}
]
if response is None:
raise last_error or ValueError(
"Failed to get valid response after all retry attempts"
# Track LLM usage stats
self.merge_stats(
NodeExecutionStats(
input_token_count=response.prompt_tokens,
output_token_count=response.completion_tokens,
llm_call_count=1,
)
)
if not response.tool_calls:
yield "finished", response.response
@@ -682,6 +547,7 @@ class SmartDecisionMakerBlock(Block):
tool_name = tool_call.function.name
tool_args = json.loads(tool_call.function.arguments)
# Find the tool definition to get the expected arguments
tool_def = next(
(
tool
@@ -690,6 +556,7 @@ class SmartDecisionMakerBlock(Block):
),
None,
)
if (
tool_def
and "function" in tool_def
@@ -697,38 +564,20 @@ class SmartDecisionMakerBlock(Block):
):
expected_args = tool_def["function"]["parameters"].get("properties", {})
else:
expected_args = {arg: {} for arg in tool_args.keys()}
expected_args = tool_args.keys()
# Get field mapping from tool definition
field_mapping = (
tool_def.get("function", {}).get("_field_mapping", {})
if tool_def
else {}
)
for clean_arg_name in expected_args:
# arg_name is now always the cleaned field name (for Anthropic API compliance)
# Get the original field name from field mapping for proper emit key generation
original_field_name = field_mapping.get(clean_arg_name, clean_arg_name)
arg_value = tool_args.get(clean_arg_name)
sanitized_tool_name = self.cleanup(tool_name)
sanitized_arg_name = self.cleanup(original_field_name)
emit_key = f"tools_^_{sanitized_tool_name}_~_{sanitized_arg_name}"
logger.debug(
"[SmartDecisionMakerBlock|geid:%s|neid:%s] emit %s",
graph_exec_id,
node_exec_id,
emit_key,
)
yield emit_key, arg_value
# Yield provided arguments and None for missing ones
for arg_name in expected_args:
if arg_name in tool_args:
yield f"tools_^_{tool_name}_~_{arg_name}", tool_args[arg_name]
else:
yield f"tools_^_{tool_name}_~_{arg_name}", None
# Add reasoning to conversation history if available
if response.reasoning:
prompt.append(
{"role": "assistant", "content": f"[Reasoning]: {response.reasoning}"}
)
prompt.append(_convert_raw_response_to_dict(response.raw_response))
prompt.append(response.raw_response)
yield "conversations", prompt

View File

@@ -19,7 +19,7 @@ async def test_block_ids_valid(block: Type[Block]):
# Skip list for blocks with known invalid UUIDs
skip_blocks = {
"GetWeatherInformationBlock",
"ExecuteCodeBlock",
"CodeExecutionBlock",
"CountdownTimerBlock",
"TwitterGetListTweetsBlock",
"TwitterRemoveListMemberBlock",

View File

@@ -1,269 +0,0 @@
"""
Test security fixes for various DoS vulnerabilities.
"""
import asyncio
from unittest.mock import patch
import pytest
from backend.blocks.code_extraction_block import CodeExtractionBlock
from backend.blocks.iteration import StepThroughItemsBlock
from backend.blocks.llm import AITextSummarizerBlock
from backend.blocks.text import ExtractTextInformationBlock
from backend.blocks.xml_parser import XMLParserBlock
from backend.util.file import store_media_file
from backend.util.type import MediaFileType
class TestCodeExtractionBlockSecurity:
"""Test ReDoS fixes in CodeExtractionBlock."""
async def test_redos_protection(self):
"""Test that the regex patterns don't cause ReDoS."""
block = CodeExtractionBlock()
# Test with input that would previously cause ReDoS
malicious_input = "```python" + " " * 10000 # Large spaces
result = []
async for output_name, output_data in block.run(
CodeExtractionBlock.Input(text=malicious_input)
):
result.append((output_name, output_data))
# Should complete without hanging
assert len(result) >= 1
assert any(name == "remaining_text" for name, _ in result)
class TestAITextSummarizerBlockSecurity:
"""Test memory exhaustion fixes in AITextSummarizerBlock."""
def test_split_text_limits(self):
"""Test that _split_text has proper limits."""
# Test text size limit
large_text = "a" * 2_000_000 # 2MB text
result = AITextSummarizerBlock._split_text(large_text, 1000, 100)
# Should be truncated to 1MB
total_chars = sum(len(chunk) for chunk in result)
assert total_chars <= 1_000_000 + 1000 # Allow for chunk boundary
# Test chunk count limit
result = AITextSummarizerBlock._split_text("word " * 10000, 10, 9)
assert len(result) <= 100 # MAX_CHUNKS limit
# Test parameter validation
result = AITextSummarizerBlock._split_text(
"test", 10, 15
) # overlap > max_tokens
assert len(result) >= 1 # Should still work
class TestExtractTextInformationBlockSecurity:
"""Test ReDoS and memory exhaustion fixes in ExtractTextInformationBlock."""
async def test_text_size_limits(self):
"""Test text size limits."""
block = ExtractTextInformationBlock()
# Test with large input
large_text = "a" * 2_000_000 # 2MB
results = []
async for output_name, output_data in block.run(
ExtractTextInformationBlock.Input(
text=large_text, pattern=r"a+", find_all=True, group=0
)
):
results.append((output_name, output_data))
# Should complete and have limits applied
matched_results = [r for name, r in results if name == "matched_results"]
if matched_results:
assert len(matched_results[0]) <= 1000 # MAX_MATCHES limit
async def test_dangerous_pattern_timeout(self):
"""Test timeout protection for dangerous patterns."""
block = ExtractTextInformationBlock()
# Test with potentially dangerous lookahead pattern
test_input = "a" * 1000
# This should complete quickly due to timeout protection
start_time = asyncio.get_event_loop().time()
results = []
async for output_name, output_data in block.run(
ExtractTextInformationBlock.Input(
text=test_input, pattern=r"(?=.+)", find_all=True, group=0
)
):
results.append((output_name, output_data))
end_time = asyncio.get_event_loop().time()
# Should complete within reasonable time (much less than 5s timeout)
assert (end_time - start_time) < 10
async def test_redos_catastrophic_backtracking(self):
"""Test that ReDoS patterns with catastrophic backtracking are handled."""
block = ExtractTextInformationBlock()
# Pattern that causes catastrophic backtracking: (a+)+b
# With input "aaaaaaaaaaaaaaaaaaaaaaaaaaaa" (no 'b'), this causes exponential time
dangerous_pattern = r"(a+)+b"
test_input = "a" * 30 # 30 'a's without a 'b' at the end
# This should be handled by timeout protection or pattern detection
start_time = asyncio.get_event_loop().time()
results = []
async for output_name, output_data in block.run(
ExtractTextInformationBlock.Input(
text=test_input, pattern=dangerous_pattern, find_all=True, group=0
)
):
results.append((output_name, output_data))
end_time = asyncio.get_event_loop().time()
elapsed = end_time - start_time
# Should complete within timeout (6 seconds to be safe)
# The current threading.Timer approach doesn't work, so this will likely fail
# demonstrating the need for a fix
assert elapsed < 6, f"Regex took {elapsed}s, timeout mechanism failed"
# Should return empty results on timeout or no match
matched_results = [r for name, r in results if name == "matched_results"]
assert matched_results[0] == [] # No matches expected
class TestStepThroughItemsBlockSecurity:
"""Test iteration limits in StepThroughItemsBlock."""
async def test_item_count_limits(self):
"""Test maximum item count limits."""
block = StepThroughItemsBlock()
# Test with too many items
large_list = list(range(20000)) # Exceeds MAX_ITEMS (10000)
with pytest.raises(ValueError, match="Too many items"):
async for _ in block.run(StepThroughItemsBlock.Input(items=large_list)):
pass
async def test_string_size_limits(self):
"""Test string input size limits."""
block = StepThroughItemsBlock()
# Test with large JSON string
large_string = '["item"]' * 200000 # Large JSON string
with pytest.raises(ValueError, match="Input too large"):
async for _ in block.run(
StepThroughItemsBlock.Input(items_str=large_string)
):
pass
async def test_normal_iteration_works(self):
"""Test that normal iteration still works."""
block = StepThroughItemsBlock()
results = []
async for output_name, output_data in block.run(
StepThroughItemsBlock.Input(items=[1, 2, 3])
):
results.append((output_name, output_data))
# Should have 6 outputs (item, key for each of 3 items)
assert len(results) == 6
items = [data for name, data in results if name == "item"]
assert items == [1, 2, 3]
class TestXMLParserBlockSecurity:
"""Test XML size limits in XMLParserBlock."""
async def test_xml_size_limits(self):
"""Test XML input size limits."""
block = XMLParserBlock()
# Test with large XML - need to exceed 10MB limit
# Each "<item>data</item>" is 17 chars, need ~620K items for >10MB
large_xml = "<root>" + "<item>data</item>" * 620000 + "</root>"
with pytest.raises(ValueError, match="XML too large"):
async for _ in block.run(XMLParserBlock.Input(input_xml=large_xml)):
pass
class TestStoreMediaFileSecurity:
"""Test file storage security limits."""
@patch("backend.util.file.scan_content_safe")
@patch("backend.util.file.get_cloud_storage_handler")
async def test_file_size_limits(self, mock_cloud_storage, mock_scan):
"""Test file size limits."""
# Mock cloud storage handler - get_cloud_storage_handler is async
# but is_cloud_path and parse_cloud_path are sync methods
from unittest.mock import MagicMock
mock_handler = MagicMock()
mock_handler.is_cloud_path.return_value = False
# Make get_cloud_storage_handler an async function that returns the mock handler
async def async_get_handler():
return mock_handler
mock_cloud_storage.side_effect = async_get_handler
mock_scan.return_value = None
# Test with large base64 content
large_content = "a" * (200 * 1024 * 1024) # 200MB
large_data_uri = f"data:text/plain;base64,{large_content}"
with pytest.raises(ValueError, match="File too large"):
await store_media_file(
graph_exec_id="test",
file=MediaFileType(large_data_uri),
user_id="test_user",
)
@patch("backend.util.file.Path")
@patch("backend.util.file.scan_content_safe")
@patch("backend.util.file.get_cloud_storage_handler")
async def test_directory_size_limits(self, mock_cloud_storage, mock_scan, MockPath):
"""Test directory size limits."""
from unittest.mock import MagicMock
mock_handler = MagicMock()
mock_handler.is_cloud_path.return_value = False
async def async_get_handler():
return mock_handler
mock_cloud_storage.side_effect = async_get_handler
mock_scan.return_value = None
# Create mock path instance for the execution directory
mock_path_instance = MagicMock()
mock_path_instance.exists.return_value = True
# Mock glob to return files that total > 1GB
mock_file = MagicMock()
mock_file.is_file.return_value = True
mock_file.stat.return_value.st_size = 2 * 1024 * 1024 * 1024 # 2GB
mock_path_instance.glob.return_value = [mock_file]
# Make Path() return our mock
MockPath.return_value = mock_path_instance
# Should raise an error when directory size exceeds limit
with pytest.raises(ValueError, match="Disk usage limit exceeded"):
await store_media_file(
graph_exec_id="test",
file=MediaFileType(
"data:text/plain;base64,dGVzdA=="
), # Small test file
user_id="test_user",
)

View File

@@ -30,6 +30,7 @@ class TestLLMStatsTracking:
credentials=llm.TEST_CREDENTIALS,
llm_model=llm.LlmModel.GPT4O,
prompt=[{"role": "user", "content": "Hello"}],
json_format=False,
max_tokens=100,
)
@@ -41,8 +42,6 @@ class TestLLMStatsTracking:
@pytest.mark.asyncio
async def test_ai_structured_response_block_tracks_stats(self):
"""Test that AIStructuredResponseGeneratorBlock correctly tracks stats."""
from unittest.mock import patch
import backend.blocks.llm as llm
block = llm.AIStructuredResponseGeneratorBlock()
@@ -52,7 +51,7 @@ class TestLLMStatsTracking:
return llm.LLMResponse(
raw_response="",
prompt=[],
response='<json_output id="test123456">{"key1": "value1", "key2": "value2"}</json_output>',
response='{"key1": "value1", "key2": "value2"}',
tool_calls=None,
prompt_tokens=15,
completion_tokens=25,
@@ -70,12 +69,10 @@ class TestLLMStatsTracking:
)
outputs = {}
# Mock secrets.token_hex to return consistent ID
with patch("secrets.token_hex", return_value="test123456"):
async for output_name, output_data in block.run(
input_data, credentials=llm.TEST_CREDENTIALS
):
outputs[output_name] = output_data
async for output_name, output_data in block.run(
input_data, credentials=llm.TEST_CREDENTIALS
):
outputs[output_name] = output_data
# Check stats
assert block.execution_stats.input_token_count == 15
@@ -146,7 +143,7 @@ class TestLLMStatsTracking:
return llm.LLMResponse(
raw_response="",
prompt=[],
response='<json_output id="test123456">{"wrong": "format"}</json_output>',
response='{"wrong": "format"}',
tool_calls=None,
prompt_tokens=10,
completion_tokens=15,
@@ -157,7 +154,7 @@ class TestLLMStatsTracking:
return llm.LLMResponse(
raw_response="",
prompt=[],
response='<json_output id="test123456">{"key1": "value1", "key2": "value2"}</json_output>',
response='{"key1": "value1", "key2": "value2"}',
tool_calls=None,
prompt_tokens=20,
completion_tokens=25,
@@ -176,12 +173,10 @@ class TestLLMStatsTracking:
)
outputs = {}
# Mock secrets.token_hex to return consistent ID
with patch("secrets.token_hex", return_value="test123456"):
async for output_name, output_data in block.run(
input_data, credentials=llm.TEST_CREDENTIALS
):
outputs[output_name] = output_data
async for output_name, output_data in block.run(
input_data, credentials=llm.TEST_CREDENTIALS
):
outputs[output_name] = output_data
# Check stats - should accumulate both calls
# For 2 attempts: attempt 1 (failed) + attempt 2 (success) = 2 total
@@ -274,8 +269,7 @@ class TestLLMStatsTracking:
mock_response.choices = [
MagicMock(
message=MagicMock(
content='<json_output id="test123456">{"summary": "Test chunk summary"}</json_output>',
tool_calls=None,
content='{"summary": "Test chunk summary"}', tool_calls=None
)
)
]
@@ -283,7 +277,7 @@ class TestLLMStatsTracking:
mock_response.choices = [
MagicMock(
message=MagicMock(
content='<json_output id="test123456">{"final_summary": "Test final summary"}</json_output>',
content='{"final_summary": "Test final summary"}',
tool_calls=None,
)
)
@@ -304,13 +298,11 @@ class TestLLMStatsTracking:
max_tokens=1000, # Large enough to avoid chunking
)
# Mock secrets.token_hex to return consistent ID
with patch("secrets.token_hex", return_value="test123456"):
outputs = {}
async for output_name, output_data in block.run(
input_data, credentials=llm.TEST_CREDENTIALS
):
outputs[output_name] = output_data
outputs = {}
async for output_name, output_data in block.run(
input_data, credentials=llm.TEST_CREDENTIALS
):
outputs[output_name] = output_data
print(f"Actual calls made: {call_count}")
print(f"Block stats: {block.execution_stats}")
@@ -465,7 +457,7 @@ class TestLLMStatsTracking:
return llm.LLMResponse(
raw_response="",
prompt=[],
response='<json_output id="test123456">{"result": "test"}</json_output>',
response='{"result": "test"}',
tool_calls=None,
prompt_tokens=10,
completion_tokens=20,
@@ -484,12 +476,10 @@ class TestLLMStatsTracking:
# Run the block
outputs = {}
# Mock secrets.token_hex to return consistent ID
with patch("secrets.token_hex", return_value="test123456"):
async for output_name, output_data in block.run(
input_data, credentials=llm.TEST_CREDENTIALS
):
outputs[output_name] = output_data
async for output_name, output_data in block.run(
input_data, credentials=llm.TEST_CREDENTIALS
):
outputs[output_name] = output_data
# Block finished - now grab and assert stats
assert block.execution_stats is not None

View File

@@ -216,17 +216,8 @@ async def test_smart_decision_maker_tracks_llm_stats():
}
# Mock the _create_function_signature method to avoid database calls
from unittest.mock import AsyncMock
with patch(
"backend.blocks.llm.llm_call",
new_callable=AsyncMock,
return_value=mock_response,
), patch.object(
SmartDecisionMakerBlock,
"_create_function_signature",
new_callable=AsyncMock,
return_value=[],
with patch("backend.blocks.llm.llm_call", return_value=mock_response), patch.object(
SmartDecisionMakerBlock, "_create_function_signature", return_value=[]
):
# Create test input
@@ -258,471 +249,3 @@ async def test_smart_decision_maker_tracks_llm_stats():
# Verify outputs
assert "finished" in outputs # Should have finished since no tool calls
assert outputs["finished"] == "I need to think about this."
@pytest.mark.asyncio
async def test_smart_decision_maker_parameter_validation():
"""Test that SmartDecisionMakerBlock correctly validates tool call parameters."""
from unittest.mock import MagicMock, patch
import backend.blocks.llm as llm_module
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
block = SmartDecisionMakerBlock()
# Mock tool functions with specific parameter schema
mock_tool_functions = [
{
"type": "function",
"function": {
"name": "search_keywords",
"description": "Search for keywords with difficulty filtering",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search query"},
"max_keyword_difficulty": {
"type": "integer",
"description": "Maximum keyword difficulty (required)",
},
"optional_param": {
"type": "string",
"description": "Optional parameter with default",
"default": "default_value",
},
},
"required": ["query", "max_keyword_difficulty"],
},
},
}
]
# Test case 1: Tool call with TYPO in parameter name (should retry and eventually fail)
mock_tool_call_with_typo = MagicMock()
mock_tool_call_with_typo.function.name = "search_keywords"
mock_tool_call_with_typo.function.arguments = '{"query": "test", "maximum_keyword_difficulty": 50}' # TYPO: maximum instead of max
mock_response_with_typo = MagicMock()
mock_response_with_typo.response = None
mock_response_with_typo.tool_calls = [mock_tool_call_with_typo]
mock_response_with_typo.prompt_tokens = 50
mock_response_with_typo.completion_tokens = 25
mock_response_with_typo.reasoning = None
mock_response_with_typo.raw_response = {"role": "assistant", "content": None}
from unittest.mock import AsyncMock
with patch(
"backend.blocks.llm.llm_call",
new_callable=AsyncMock,
return_value=mock_response_with_typo,
) as mock_llm_call, patch.object(
SmartDecisionMakerBlock,
"_create_function_signature",
new_callable=AsyncMock,
return_value=mock_tool_functions,
):
input_data = SmartDecisionMakerBlock.Input(
prompt="Search for keywords",
model=llm_module.LlmModel.GPT4O,
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
retry=2, # Set retry to 2 for testing
)
# Should raise ValueError after retries due to typo'd parameter name
with pytest.raises(ValueError) as exc_info:
outputs = {}
async for output_name, output_data in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph-id",
node_id="test-node-id",
graph_exec_id="test-exec-id",
node_exec_id="test-node-exec-id",
user_id="test-user-id",
):
outputs[output_name] = output_data
# Verify error message contains details about the typo
error_msg = str(exc_info.value)
assert "Tool call 'search_keywords' has parameter errors" in error_msg
assert "Unknown parameters: ['maximum_keyword_difficulty']" in error_msg
# Verify that LLM was called the expected number of times (retries)
assert mock_llm_call.call_count == 2 # Should retry based on input_data.retry
# Test case 2: Tool call missing REQUIRED parameter (should raise ValueError)
mock_tool_call_missing_required = MagicMock()
mock_tool_call_missing_required.function.name = "search_keywords"
mock_tool_call_missing_required.function.arguments = (
'{"query": "test"}' # Missing required max_keyword_difficulty
)
mock_response_missing_required = MagicMock()
mock_response_missing_required.response = None
mock_response_missing_required.tool_calls = [mock_tool_call_missing_required]
mock_response_missing_required.prompt_tokens = 50
mock_response_missing_required.completion_tokens = 25
mock_response_missing_required.reasoning = None
mock_response_missing_required.raw_response = {"role": "assistant", "content": None}
from unittest.mock import AsyncMock
with patch(
"backend.blocks.llm.llm_call",
new_callable=AsyncMock,
return_value=mock_response_missing_required,
), patch.object(
SmartDecisionMakerBlock,
"_create_function_signature",
new_callable=AsyncMock,
return_value=mock_tool_functions,
):
input_data = SmartDecisionMakerBlock.Input(
prompt="Search for keywords",
model=llm_module.LlmModel.GPT4O,
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
)
# Should raise ValueError due to missing required parameter
with pytest.raises(ValueError) as exc_info:
outputs = {}
async for output_name, output_data in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph-id",
node_id="test-node-id",
graph_exec_id="test-exec-id",
node_exec_id="test-node-exec-id",
user_id="test-user-id",
):
outputs[output_name] = output_data
error_msg = str(exc_info.value)
assert "Tool call 'search_keywords' has parameter errors" in error_msg
assert "Missing required parameters: ['max_keyword_difficulty']" in error_msg
# Test case 3: Valid tool call with OPTIONAL parameter missing (should succeed)
mock_tool_call_valid = MagicMock()
mock_tool_call_valid.function.name = "search_keywords"
mock_tool_call_valid.function.arguments = '{"query": "test", "max_keyword_difficulty": 50}' # optional_param missing, but that's OK
mock_response_valid = MagicMock()
mock_response_valid.response = None
mock_response_valid.tool_calls = [mock_tool_call_valid]
mock_response_valid.prompt_tokens = 50
mock_response_valid.completion_tokens = 25
mock_response_valid.reasoning = None
mock_response_valid.raw_response = {"role": "assistant", "content": None}
from unittest.mock import AsyncMock
with patch(
"backend.blocks.llm.llm_call",
new_callable=AsyncMock,
return_value=mock_response_valid,
), patch.object(
SmartDecisionMakerBlock,
"_create_function_signature",
new_callable=AsyncMock,
return_value=mock_tool_functions,
):
input_data = SmartDecisionMakerBlock.Input(
prompt="Search for keywords",
model=llm_module.LlmModel.GPT4O,
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
)
# Should succeed - optional parameter missing is OK
outputs = {}
async for output_name, output_data in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph-id",
node_id="test-node-id",
graph_exec_id="test-exec-id",
node_exec_id="test-node-exec-id",
user_id="test-user-id",
):
outputs[output_name] = output_data
# Verify tool outputs were generated correctly
assert "tools_^_search_keywords_~_query" in outputs
assert outputs["tools_^_search_keywords_~_query"] == "test"
assert "tools_^_search_keywords_~_max_keyword_difficulty" in outputs
assert outputs["tools_^_search_keywords_~_max_keyword_difficulty"] == 50
# Optional parameter should be None when not provided
assert "tools_^_search_keywords_~_optional_param" in outputs
assert outputs["tools_^_search_keywords_~_optional_param"] is None
# Test case 4: Valid tool call with ALL parameters (should succeed)
mock_tool_call_all_params = MagicMock()
mock_tool_call_all_params.function.name = "search_keywords"
mock_tool_call_all_params.function.arguments = '{"query": "test", "max_keyword_difficulty": 50, "optional_param": "custom_value"}'
mock_response_all_params = MagicMock()
mock_response_all_params.response = None
mock_response_all_params.tool_calls = [mock_tool_call_all_params]
mock_response_all_params.prompt_tokens = 50
mock_response_all_params.completion_tokens = 25
mock_response_all_params.reasoning = None
mock_response_all_params.raw_response = {"role": "assistant", "content": None}
from unittest.mock import AsyncMock
with patch(
"backend.blocks.llm.llm_call",
new_callable=AsyncMock,
return_value=mock_response_all_params,
), patch.object(
SmartDecisionMakerBlock,
"_create_function_signature",
new_callable=AsyncMock,
return_value=mock_tool_functions,
):
input_data = SmartDecisionMakerBlock.Input(
prompt="Search for keywords",
model=llm_module.LlmModel.GPT4O,
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
)
# Should succeed with all parameters
outputs = {}
async for output_name, output_data in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph-id",
node_id="test-node-id",
graph_exec_id="test-exec-id",
node_exec_id="test-node-exec-id",
user_id="test-user-id",
):
outputs[output_name] = output_data
# Verify all tool outputs were generated correctly
assert outputs["tools_^_search_keywords_~_query"] == "test"
assert outputs["tools_^_search_keywords_~_max_keyword_difficulty"] == 50
assert outputs["tools_^_search_keywords_~_optional_param"] == "custom_value"
@pytest.mark.asyncio
async def test_smart_decision_maker_raw_response_conversion():
"""Test that SmartDecisionMaker correctly handles different raw_response types with retry mechanism."""
from unittest.mock import MagicMock, patch
import backend.blocks.llm as llm_module
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
block = SmartDecisionMakerBlock()
# Mock tool functions
mock_tool_functions = [
{
"type": "function",
"function": {
"name": "test_tool",
"parameters": {
"type": "object",
"properties": {"param": {"type": "string"}},
"required": ["param"],
},
},
}
]
# Test case 1: Simulate ChatCompletionMessage raw_response that caused the original error
class MockChatCompletionMessage:
"""Simulate OpenAI's ChatCompletionMessage object that lacks .get() method"""
def __init__(self, role, content, tool_calls=None):
self.role = role
self.content = content
self.tool_calls = tool_calls or []
# This is what caused the error - no .get() method
# def get(self, key, default=None): # Intentionally missing
# First response: has invalid parameter name (triggers retry)
mock_tool_call_invalid = MagicMock()
mock_tool_call_invalid.function.name = "test_tool"
mock_tool_call_invalid.function.arguments = (
'{"wrong_param": "test_value"}' # Invalid parameter name
)
mock_response_retry = MagicMock()
mock_response_retry.response = None
mock_response_retry.tool_calls = [mock_tool_call_invalid]
mock_response_retry.prompt_tokens = 50
mock_response_retry.completion_tokens = 25
mock_response_retry.reasoning = None
# This would cause the original error without our fix
mock_response_retry.raw_response = MockChatCompletionMessage(
role="assistant", content=None, tool_calls=[mock_tool_call_invalid]
)
# Second response: successful (correct parameter name)
mock_tool_call_valid = MagicMock()
mock_tool_call_valid.function.name = "test_tool"
mock_tool_call_valid.function.arguments = (
'{"param": "test_value"}' # Correct parameter name
)
mock_response_success = MagicMock()
mock_response_success.response = None
mock_response_success.tool_calls = [mock_tool_call_valid]
mock_response_success.prompt_tokens = 50
mock_response_success.completion_tokens = 25
mock_response_success.reasoning = None
mock_response_success.raw_response = MockChatCompletionMessage(
role="assistant", content=None, tool_calls=[mock_tool_call_valid]
)
# Mock llm_call to return different responses on different calls
from unittest.mock import AsyncMock
with patch(
"backend.blocks.llm.llm_call", new_callable=AsyncMock
) as mock_llm_call, patch.object(
SmartDecisionMakerBlock,
"_create_function_signature",
new_callable=AsyncMock,
return_value=mock_tool_functions,
):
# First call returns response that will trigger retry due to validation error
# Second call returns successful response
mock_llm_call.side_effect = [mock_response_retry, mock_response_success]
input_data = SmartDecisionMakerBlock.Input(
prompt="Test prompt",
model=llm_module.LlmModel.GPT4O,
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
retry=2,
)
# Should succeed after retry, demonstrating our helper function works
outputs = {}
async for output_name, output_data in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph-id",
node_id="test-node-id",
graph_exec_id="test-exec-id",
node_exec_id="test-node-exec-id",
user_id="test-user-id",
):
outputs[output_name] = output_data
# Verify the tool output was generated successfully
assert "tools_^_test_tool_~_param" in outputs
assert outputs["tools_^_test_tool_~_param"] == "test_value"
# Verify conversation history was properly maintained
assert "conversations" in outputs
conversations = outputs["conversations"]
assert len(conversations) > 0
# The conversations should contain properly converted raw_response objects as dicts
# This would have failed with the original bug due to ChatCompletionMessage.get() error
for msg in conversations:
assert isinstance(msg, dict), f"Expected dict, got {type(msg)}"
if msg.get("role") == "assistant":
# Should have been converted from ChatCompletionMessage to dict
assert "role" in msg
# Verify LLM was called twice (initial + 1 retry)
assert mock_llm_call.call_count == 2
# Test case 2: Test with different raw_response types (Ollama string, dict)
# Test Ollama string response
mock_response_ollama = MagicMock()
mock_response_ollama.response = "I'll help you with that."
mock_response_ollama.tool_calls = None
mock_response_ollama.prompt_tokens = 30
mock_response_ollama.completion_tokens = 15
mock_response_ollama.reasoning = None
mock_response_ollama.raw_response = (
"I'll help you with that." # Ollama returns string
)
from unittest.mock import AsyncMock
with patch(
"backend.blocks.llm.llm_call",
new_callable=AsyncMock,
return_value=mock_response_ollama,
), patch.object(
SmartDecisionMakerBlock,
"_create_function_signature",
new_callable=AsyncMock,
return_value=[], # No tools for this test
):
input_data = SmartDecisionMakerBlock.Input(
prompt="Simple prompt",
model=llm_module.LlmModel.GPT4O,
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
)
outputs = {}
async for output_name, output_data in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph-id",
node_id="test-node-id",
graph_exec_id="test-exec-id",
node_exec_id="test-node-exec-id",
user_id="test-user-id",
):
outputs[output_name] = output_data
# Should finish since no tool calls
assert "finished" in outputs
assert outputs["finished"] == "I'll help you with that."
# Test case 3: Test with dict raw_response (some providers/tests)
mock_response_dict = MagicMock()
mock_response_dict.response = "Test response"
mock_response_dict.tool_calls = None
mock_response_dict.prompt_tokens = 25
mock_response_dict.completion_tokens = 10
mock_response_dict.reasoning = None
mock_response_dict.raw_response = {
"role": "assistant",
"content": "Test response",
} # Dict format
from unittest.mock import AsyncMock
with patch(
"backend.blocks.llm.llm_call",
new_callable=AsyncMock,
return_value=mock_response_dict,
), patch.object(
SmartDecisionMakerBlock,
"_create_function_signature",
new_callable=AsyncMock,
return_value=[],
):
input_data = SmartDecisionMakerBlock.Input(
prompt="Another test",
model=llm_module.LlmModel.GPT4O,
credentials=llm_module.TEST_CREDENTIALS_INPUT, # type: ignore
)
outputs = {}
async for output_name, output_data in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph-id",
node_id="test-node-id",
graph_exec_id="test-exec-id",
node_exec_id="test-node-exec-id",
user_id="test-user-id",
):
outputs[output_name] = output_data
assert "finished" in outputs
assert outputs["finished"] == "Test response"

View File

@@ -48,24 +48,16 @@ async def test_smart_decision_maker_handles_dynamic_dict_fields():
assert "parameters" in signature["function"]
assert "properties" in signature["function"]["parameters"]
# Check that dynamic fields are handled with original names
# Check that dynamic fields are handled
properties = signature["function"]["parameters"]["properties"]
assert len(properties) == 3 # Should have all three fields
# Check that field names are cleaned (for Anthropic API compatibility)
assert "values___name" in properties
assert "values___age" in properties
assert "values___city" in properties
# Each dynamic field should have proper schema with descriptive text
for field_name, prop_value in properties.items():
# Each dynamic field should have proper schema
for prop_value in properties.values():
assert "type" in prop_value
assert prop_value["type"] == "string" # Dynamic fields get string type
assert "description" in prop_value
# Check that descriptions properly explain the dynamic field
if field_name == "values___name":
assert "Dictionary field 'name'" in prop_value["description"]
assert "values['name']" in prop_value["description"]
assert "Dynamic value for" in prop_value["description"]
@pytest.mark.asyncio
@@ -104,18 +96,10 @@ async def test_smart_decision_maker_handles_dynamic_list_fields():
properties = signature["function"]["parameters"]["properties"]
assert len(properties) == 2 # Should have both list items
# Check that field names are cleaned (for Anthropic API compatibility)
assert "entries___0" in properties
assert "entries___1" in properties
# Each dynamic field should have proper schema with descriptive text
for field_name, prop_value in properties.items():
# Each dynamic field should have proper schema
for prop_value in properties.values():
assert prop_value["type"] == "string"
assert "description" in prop_value
# Check that descriptions properly explain the list field
if field_name == "entries___0":
assert "List item 0" in prop_value["description"]
assert "entries[0]" in prop_value["description"]
assert "Dynamic value for" in prop_value["description"]
@pytest.mark.asyncio

View File

@@ -1,553 +0,0 @@
"""Comprehensive tests for SmartDecisionMakerBlock dynamic field handling."""
import json
from unittest.mock import AsyncMock, Mock, patch
import pytest
from backend.blocks.data_manipulation import AddToListBlock, CreateDictionaryBlock
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
from backend.blocks.text import MatchTextPatternBlock
from backend.data.dynamic_fields import get_dynamic_field_description
@pytest.mark.asyncio
async def test_dynamic_field_description_generation():
"""Test that dynamic field descriptions are generated correctly."""
# Test dictionary field description
desc = get_dynamic_field_description("values_#_name")
assert "Dictionary field 'name' for base field 'values'" in desc
assert "values['name']" in desc
# Test list field description
desc = get_dynamic_field_description("items_$_0")
assert "List item 0 for base field 'items'" in desc
assert "items[0]" in desc
# Test object field description
desc = get_dynamic_field_description("user_@_email")
assert "Object attribute 'email' for base field 'user'" in desc
assert "user.email" in desc
# Test regular field fallback
desc = get_dynamic_field_description("regular_field")
assert desc == "Value for regular_field"
@pytest.mark.asyncio
async def test_create_block_function_signature_with_dict_fields():
"""Test that function signatures are created correctly for dictionary dynamic fields."""
block = SmartDecisionMakerBlock()
# Create a mock node for CreateDictionaryBlock
mock_node = Mock()
mock_node.block = CreateDictionaryBlock()
mock_node.block_id = CreateDictionaryBlock().id
mock_node.input_default = {}
# Create mock links with dynamic dictionary fields (source sanitized, sink original)
mock_links = [
Mock(
source_name="tools_^_create_dict_~_values___name", # Sanitized source
sink_name="values_#_name", # Original sink
sink_id="dict_node_id",
source_id="smart_decision_node_id",
),
Mock(
source_name="tools_^_create_dict_~_values___age", # Sanitized source
sink_name="values_#_age", # Original sink
sink_id="dict_node_id",
source_id="smart_decision_node_id",
),
Mock(
source_name="tools_^_create_dict_~_values___email", # Sanitized source
sink_name="values_#_email", # Original sink
sink_id="dict_node_id",
source_id="smart_decision_node_id",
),
]
# Generate function signature
signature = await block._create_block_function_signature(mock_node, mock_links) # type: ignore
# Verify the signature structure
assert signature["type"] == "function"
assert "function" in signature
assert "parameters" in signature["function"]
assert "properties" in signature["function"]["parameters"]
# Check that dynamic fields are handled with original names
properties = signature["function"]["parameters"]["properties"]
assert len(properties) == 3
# Check cleaned field names (for Anthropic API compatibility)
assert "values___name" in properties
assert "values___age" in properties
assert "values___email" in properties
# Check descriptions mention they are dictionary fields
assert "Dictionary field" in properties["values___name"]["description"]
assert "values['name']" in properties["values___name"]["description"]
assert "Dictionary field" in properties["values___age"]["description"]
assert "values['age']" in properties["values___age"]["description"]
assert "Dictionary field" in properties["values___email"]["description"]
assert "values['email']" in properties["values___email"]["description"]
@pytest.mark.asyncio
async def test_create_block_function_signature_with_list_fields():
"""Test that function signatures are created correctly for list dynamic fields."""
block = SmartDecisionMakerBlock()
# Create a mock node for AddToListBlock
mock_node = Mock()
mock_node.block = AddToListBlock()
mock_node.block_id = AddToListBlock().id
mock_node.input_default = {}
# Create mock links with dynamic list fields
mock_links = [
Mock(
source_name="tools_^_add_list_~_0",
sink_name="entries_$_0", # Dynamic list field
sink_id="list_node_id",
source_id="smart_decision_node_id",
),
Mock(
source_name="tools_^_add_list_~_1",
sink_name="entries_$_1", # Dynamic list field
sink_id="list_node_id",
source_id="smart_decision_node_id",
),
Mock(
source_name="tools_^_add_list_~_2",
sink_name="entries_$_2", # Dynamic list field
sink_id="list_node_id",
source_id="smart_decision_node_id",
),
]
# Generate function signature
signature = await block._create_block_function_signature(mock_node, mock_links) # type: ignore
# Verify the signature structure
assert signature["type"] == "function"
properties = signature["function"]["parameters"]["properties"]
# Check cleaned field names (for Anthropic API compatibility)
assert "entries___0" in properties
assert "entries___1" in properties
assert "entries___2" in properties
# Check descriptions mention they are list items
assert "List item 0" in properties["entries___0"]["description"]
assert "entries[0]" in properties["entries___0"]["description"]
assert "List item 1" in properties["entries___1"]["description"]
assert "entries[1]" in properties["entries___1"]["description"]
@pytest.mark.asyncio
async def test_create_block_function_signature_with_object_fields():
"""Test that function signatures are created correctly for object dynamic fields."""
block = SmartDecisionMakerBlock()
# Create a mock node for MatchTextPatternBlock (simulating object fields)
mock_node = Mock()
mock_node.block = MatchTextPatternBlock()
mock_node.block_id = MatchTextPatternBlock().id
mock_node.input_default = {}
# Create mock links with dynamic object fields
mock_links = [
Mock(
source_name="tools_^_extract_~_user_name",
sink_name="data_@_user_name", # Dynamic object field
sink_id="extract_node_id",
source_id="smart_decision_node_id",
),
Mock(
source_name="tools_^_extract_~_user_email",
sink_name="data_@_user_email", # Dynamic object field
sink_id="extract_node_id",
source_id="smart_decision_node_id",
),
]
# Generate function signature
signature = await block._create_block_function_signature(mock_node, mock_links) # type: ignore
# Verify the signature structure
properties = signature["function"]["parameters"]["properties"]
# Check cleaned field names (for Anthropic API compatibility)
assert "data___user_name" in properties
assert "data___user_email" in properties
# Check descriptions mention they are object attributes
assert "Object attribute" in properties["data___user_name"]["description"]
assert "data.user_name" in properties["data___user_name"]["description"]
@pytest.mark.asyncio
async def test_create_function_signature():
"""Test that the mapping between sanitized and original field names is built correctly."""
block = SmartDecisionMakerBlock()
# Mock the database client and connected nodes
with patch(
"backend.blocks.smart_decision_maker.get_database_manager_async_client"
) as mock_db:
mock_client = AsyncMock()
mock_db.return_value = mock_client
# Create mock nodes and links
mock_dict_node = Mock()
mock_dict_node.block = CreateDictionaryBlock()
mock_dict_node.block_id = CreateDictionaryBlock().id
mock_dict_node.input_default = {}
mock_list_node = Mock()
mock_list_node.block = AddToListBlock()
mock_list_node.block_id = AddToListBlock().id
mock_list_node.input_default = {}
# Mock links with dynamic fields
dict_link1 = Mock(
source_name="tools_^_create_dictionary_~_name",
sink_name="values_#_name",
sink_id="dict_node_id",
source_id="test_node_id",
)
dict_link2 = Mock(
source_name="tools_^_create_dictionary_~_age",
sink_name="values_#_age",
sink_id="dict_node_id",
source_id="test_node_id",
)
list_link = Mock(
source_name="tools_^_add_to_list_~_0",
sink_name="entries_$_0",
sink_id="list_node_id",
source_id="test_node_id",
)
mock_client.get_connected_output_nodes.return_value = [
(dict_link1, mock_dict_node),
(dict_link2, mock_dict_node),
(list_link, mock_list_node),
]
# Call the method that builds signatures
tool_functions = await block._create_function_signature("test_node_id")
# Verify we got 2 tool functions (one for dict, one for list)
assert len(tool_functions) == 2
# Verify the tool functions contain the dynamic field names
dict_tool = next(
(
tool
for tool in tool_functions
if tool["function"]["name"] == "createdictionaryblock"
),
None,
)
assert dict_tool is not None
dict_properties = dict_tool["function"]["parameters"]["properties"]
assert "values___name" in dict_properties
assert "values___age" in dict_properties
list_tool = next(
(
tool
for tool in tool_functions
if tool["function"]["name"] == "addtolistblock"
),
None,
)
assert list_tool is not None
list_properties = list_tool["function"]["parameters"]["properties"]
assert "entries___0" in list_properties
@pytest.mark.asyncio
async def test_output_yielding_with_dynamic_fields():
"""Test that outputs are yielded correctly with dynamic field names mapped back."""
block = SmartDecisionMakerBlock()
# No more sanitized mapping needed since we removed sanitization
# Mock LLM response with tool calls
mock_response = Mock()
mock_response.tool_calls = [
Mock(
function=Mock(
arguments=json.dumps(
{
"values___name": "Alice",
"values___age": 30,
"values___email": "alice@example.com",
}
),
)
)
]
# Ensure function name is a real string, not a Mock name
mock_response.tool_calls[0].function.name = "createdictionaryblock"
mock_response.reasoning = "Creating a dictionary with user information"
mock_response.raw_response = {"role": "assistant", "content": "test"}
mock_response.prompt_tokens = 100
mock_response.completion_tokens = 50
# Mock the LLM call
with patch(
"backend.blocks.smart_decision_maker.llm.llm_call", new_callable=AsyncMock
) as mock_llm:
mock_llm.return_value = mock_response
# Mock the function signature creation
with patch.object(
block, "_create_function_signature", new_callable=AsyncMock
) as mock_sig:
mock_sig.return_value = [
{
"type": "function",
"function": {
"name": "createdictionaryblock",
"parameters": {
"type": "object",
"properties": {
"values___name": {"type": "string"},
"values___age": {"type": "number"},
"values___email": {"type": "string"},
},
},
},
}
]
# Create input data
from backend.blocks import llm
input_data = block.input_schema(
prompt="Create a user dictionary",
credentials=llm.TEST_CREDENTIALS_INPUT,
model=llm.LlmModel.GPT4O,
)
# Run the block
outputs = {}
async for output_name, output_value in block.run(
input_data,
credentials=llm.TEST_CREDENTIALS,
graph_id="test_graph",
node_id="test_node",
graph_exec_id="test_exec",
node_exec_id="test_node_exec",
user_id="test_user",
):
outputs[output_name] = output_value
# Verify the outputs use sanitized field names (matching frontend normalizeToolName)
assert "tools_^_createdictionaryblock_~_values___name" in outputs
assert outputs["tools_^_createdictionaryblock_~_values___name"] == "Alice"
assert "tools_^_createdictionaryblock_~_values___age" in outputs
assert outputs["tools_^_createdictionaryblock_~_values___age"] == 30
assert "tools_^_createdictionaryblock_~_values___email" in outputs
assert (
outputs["tools_^_createdictionaryblock_~_values___email"]
== "alice@example.com"
)
@pytest.mark.asyncio
async def test_mixed_regular_and_dynamic_fields():
"""Test handling of blocks with both regular and dynamic fields."""
block = SmartDecisionMakerBlock()
# Create a mock node
mock_node = Mock()
mock_node.block = Mock()
mock_node.block.name = "TestBlock"
mock_node.block.description = "A test block"
mock_node.block.input_schema = Mock()
# Mock the get_field_schema to return a proper schema for regular fields
def get_field_schema(field_name):
if field_name == "regular_field":
return {"type": "string", "description": "A regular field"}
elif field_name == "values":
return {"type": "object", "description": "A dictionary field"}
else:
raise KeyError(f"Field {field_name} not found")
mock_node.block.input_schema.get_field_schema = get_field_schema
mock_node.block.input_schema.jsonschema = Mock(
return_value={"properties": {}, "required": []}
)
# Create links with both regular and dynamic fields
mock_links = [
Mock(
source_name="tools_^_test_~_regular",
sink_name="regular_field", # Regular field
sink_id="test_node_id",
source_id="smart_decision_node_id",
),
Mock(
source_name="tools_^_test_~_dict_key",
sink_name="values_#_key1", # Dynamic dict field
sink_id="test_node_id",
source_id="smart_decision_node_id",
),
Mock(
source_name="tools_^_test_~_dict_key2",
sink_name="values_#_key2", # Dynamic dict field
sink_id="test_node_id",
source_id="smart_decision_node_id",
),
]
# Generate function signature
signature = await block._create_block_function_signature(mock_node, mock_links) # type: ignore
# Check properties
properties = signature["function"]["parameters"]["properties"]
assert len(properties) == 3
# Regular field should have its original schema
assert "regular_field" in properties
assert properties["regular_field"]["description"] == "A regular field"
# Dynamic fields should have generated descriptions
assert "values___key1" in properties
assert "Dictionary field" in properties["values___key1"]["description"]
assert "values___key2" in properties
assert "Dictionary field" in properties["values___key2"]["description"]
@pytest.mark.asyncio
async def test_validation_errors_dont_pollute_conversation():
"""Test that validation errors are only used during retries and don't pollute the conversation."""
block = SmartDecisionMakerBlock()
# Track conversation history changes
conversation_snapshots = []
# Mock response with invalid tool call (missing required parameter)
invalid_response = Mock()
invalid_response.tool_calls = [
Mock(
function=Mock(
arguments=json.dumps({"wrong_param": "value"}), # Wrong parameter name
)
)
]
# Ensure function name is a real string, not a Mock name
invalid_response.tool_calls[0].function.name = "test_tool"
invalid_response.reasoning = None
invalid_response.raw_response = {"role": "assistant", "content": "invalid"}
invalid_response.prompt_tokens = 100
invalid_response.completion_tokens = 50
# Mock valid response after retry
valid_response = Mock()
valid_response.tool_calls = [
Mock(function=Mock(arguments=json.dumps({"correct_param": "value"})))
]
# Ensure function name is a real string, not a Mock name
valid_response.tool_calls[0].function.name = "test_tool"
valid_response.reasoning = None
valid_response.raw_response = {"role": "assistant", "content": "valid"}
valid_response.prompt_tokens = 100
valid_response.completion_tokens = 50
call_count = 0
async def mock_llm_call(**kwargs):
nonlocal call_count
# Capture conversation state
conversation_snapshots.append(kwargs.get("prompt", []).copy())
call_count += 1
if call_count == 1:
return invalid_response
else:
return valid_response
# Mock the LLM call
with patch(
"backend.blocks.smart_decision_maker.llm.llm_call", new_callable=AsyncMock
) as mock_llm:
mock_llm.side_effect = mock_llm_call
# Mock the function signature creation
with patch.object(
block, "_create_function_signature", new_callable=AsyncMock
) as mock_sig:
mock_sig.return_value = [
{
"type": "function",
"function": {
"name": "test_tool",
"parameters": {
"type": "object",
"properties": {
"correct_param": {
"type": "string",
"description": "The correct parameter",
}
},
"required": ["correct_param"],
},
},
}
]
# Create input data
from backend.blocks import llm
input_data = block.input_schema(
prompt="Test prompt",
credentials=llm.TEST_CREDENTIALS_INPUT,
model=llm.LlmModel.GPT4O,
retry=3, # Allow retries
)
# Run the block
outputs = {}
async for output_name, output_value in block.run(
input_data,
credentials=llm.TEST_CREDENTIALS,
graph_id="test_graph",
node_id="test_node",
graph_exec_id="test_exec",
node_exec_id="test_node_exec",
user_id="test_user",
):
outputs[output_name] = output_value
# Verify we had 2 LLM calls (initial + retry)
assert call_count == 2
# Check the final conversation output
final_conversation = outputs.get("conversations", [])
# The final conversation should NOT contain the validation error message
error_messages = [
msg
for msg in final_conversation
if msg.get("role") == "user"
and "parameter errors" in msg.get("content", "")
]
assert (
len(error_messages) == 0
), "Validation error leaked into final conversation"
# The final conversation should only have the successful response
assert final_conversation[-1]["content"] == "valid"

View File

@@ -1,131 +0,0 @@
import pytest
from backend.blocks.io import AgentTableInputBlock
from backend.util.test import execute_block_test
@pytest.mark.asyncio
async def test_table_input_block():
"""Test the AgentTableInputBlock with basic input/output."""
block = AgentTableInputBlock()
await execute_block_test(block)
@pytest.mark.asyncio
async def test_table_input_with_data():
"""Test AgentTableInputBlock with actual table data."""
block = AgentTableInputBlock()
input_data = block.Input(
name="test_table",
column_headers=["Name", "Age", "City"],
value=[
{"Name": "John", "Age": "30", "City": "New York"},
{"Name": "Jane", "Age": "25", "City": "London"},
{"Name": "Bob", "Age": "35", "City": "Paris"},
],
)
output_data = []
async for output_name, output_value in block.run(input_data):
output_data.append((output_name, output_value))
assert len(output_data) == 1
assert output_data[0][0] == "result"
result = output_data[0][1]
assert len(result) == 3
assert result[0]["Name"] == "John"
assert result[1]["Age"] == "25"
assert result[2]["City"] == "Paris"
@pytest.mark.asyncio
async def test_table_input_empty_data():
"""Test AgentTableInputBlock with empty data."""
block = AgentTableInputBlock()
input_data = block.Input(
name="empty_table", column_headers=["Col1", "Col2"], value=[]
)
output_data = []
async for output_name, output_value in block.run(input_data):
output_data.append((output_name, output_value))
assert len(output_data) == 1
assert output_data[0][0] == "result"
assert output_data[0][1] == []
@pytest.mark.asyncio
async def test_table_input_with_missing_columns():
"""Test AgentTableInputBlock passes through data with missing columns as-is."""
block = AgentTableInputBlock()
input_data = block.Input(
name="partial_table",
column_headers=["Name", "Age", "City"],
value=[
{"Name": "John", "Age": "30"}, # Missing City
{"Name": "Jane", "City": "London"}, # Missing Age
{"Age": "35", "City": "Paris"}, # Missing Name
],
)
output_data = []
async for output_name, output_value in block.run(input_data):
output_data.append((output_name, output_value))
result = output_data[0][1]
assert len(result) == 3
# Check data is passed through as-is
assert result[0] == {"Name": "John", "Age": "30"}
assert result[1] == {"Name": "Jane", "City": "London"}
assert result[2] == {"Age": "35", "City": "Paris"}
@pytest.mark.asyncio
async def test_table_input_none_value():
"""Test AgentTableInputBlock with None value returns empty list."""
block = AgentTableInputBlock()
input_data = block.Input(
name="none_table", column_headers=["Name", "Age"], value=None
)
output_data = []
async for output_name, output_value in block.run(input_data):
output_data.append((output_name, output_value))
assert len(output_data) == 1
assert output_data[0][0] == "result"
assert output_data[0][1] == []
@pytest.mark.asyncio
async def test_table_input_with_default_headers():
"""Test AgentTableInputBlock with default column headers."""
block = AgentTableInputBlock()
# Don't specify column_headers, should use defaults
input_data = block.Input(
name="default_headers_table",
value=[
{"Column 1": "A", "Column 2": "B", "Column 3": "C"},
{"Column 1": "D", "Column 2": "E", "Column 3": "F"},
],
)
output_data = []
async for output_name, output_value in block.run(input_data):
output_data.append((output_name, output_value))
assert len(output_data) == 1
assert output_data[0][0] == "result"
result = output_data[0][1]
assert len(result) == 2
assert result[0]["Column 1"] == "A"
assert result[1]["Column 3"] == "F"

View File

@@ -2,8 +2,6 @@ import re
from pathlib import Path
from typing import Any
import regex # Has built-in timeout support
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import SchemaField
from backend.util import json, text
@@ -139,11 +137,6 @@ class ExtractTextInformationBlock(Block):
)
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
# Security fix: Add limits to prevent ReDoS and memory exhaustion
MAX_TEXT_LENGTH = 1_000_000 # 1MB character limit
MAX_MATCHES = 1000 # Maximum number of matches to prevent memory exhaustion
MAX_MATCH_LENGTH = 10_000 # Maximum length per match
flags = 0
if not input_data.case_sensitive:
flags = flags | re.IGNORECASE
@@ -155,85 +148,20 @@ class ExtractTextInformationBlock(Block):
else:
txt = json.dumps(input_data.text)
# Limit text size to prevent DoS
if len(txt) > MAX_TEXT_LENGTH:
txt = txt[:MAX_TEXT_LENGTH]
# Validate regex pattern to prevent dangerous patterns
dangerous_patterns = [
r".*\+.*\+", # Nested quantifiers
r".*\*.*\*", # Nested quantifiers
r"(?=.*\+)", # Lookahead with quantifier
r"(?=.*\*)", # Lookahead with quantifier
r"\(.+\)\+", # Group with nested quantifier
r"\(.+\)\*", # Group with nested quantifier
r"\([^)]+\+\)\+", # Nested quantifiers like (a+)+
r"\([^)]+\*\)\*", # Nested quantifiers like (a*)*
matches = [
match.group(input_data.group)
for match in re.finditer(input_data.pattern, txt, flags)
if input_data.group <= len(match.groups())
]
# Check if pattern is potentially dangerous
is_dangerous = any(
re.search(dangerous, input_data.pattern) for dangerous in dangerous_patterns
)
# Use regex module with timeout for dangerous patterns
# For safe patterns, use standard re module for compatibility
try:
matches = []
match_count = 0
if is_dangerous:
# Use regex module with timeout (5 seconds) for dangerous patterns
# The regex module supports timeout parameter in finditer
try:
for match in regex.finditer(
input_data.pattern, txt, flags=flags, timeout=5.0
):
if match_count >= MAX_MATCHES:
break
if input_data.group <= len(match.groups()):
match_text = match.group(input_data.group)
# Limit match length to prevent memory exhaustion
if len(match_text) > MAX_MATCH_LENGTH:
match_text = match_text[:MAX_MATCH_LENGTH]
matches.append(match_text)
match_count += 1
except regex.error as e:
# Timeout occurred or regex error
if "timeout" in str(e).lower():
# Timeout - return empty results
pass
else:
# Other regex error
raise
else:
# Use standard re module for non-dangerous patterns
for match in re.finditer(input_data.pattern, txt, flags):
if match_count >= MAX_MATCHES:
break
if input_data.group <= len(match.groups()):
match_text = match.group(input_data.group)
# Limit match length to prevent memory exhaustion
if len(match_text) > MAX_MATCH_LENGTH:
match_text = match_text[:MAX_MATCH_LENGTH]
matches.append(match_text)
match_count += 1
if not input_data.find_all:
matches = matches[:1]
for match in matches:
yield "positive", match
if not matches:
yield "negative", input_data.text
yield "matched_results", matches
yield "matched_count", len(matches)
except Exception:
# Return empty results on any regex error
if not input_data.find_all:
matches = matches[:1]
for match in matches:
yield "positive", match
if not matches:
yield "negative", input_data.text
yield "matched_results", []
yield "matched_count", 0
yield "matched_results", matches
yield "matched_count", len(matches)
class FillTextTemplateBlock(Block):
@@ -244,11 +172,6 @@ class FillTextTemplateBlock(Block):
format: str = SchemaField(
description="Template to format the text using `values`. Use Jinja2 syntax."
)
escape_html: bool = SchemaField(
default=False,
advanced=True,
description="Whether to escape special characters in the inserted values to be HTML-safe. Enable for HTML output, disable for plain text.",
)
class Output(BlockSchema):
output: str = SchemaField(description="Formatted text")
@@ -282,7 +205,6 @@ class FillTextTemplateBlock(Block):
)
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
formatter = text.TextFormatter(autoescape=input_data.escape_html)
yield "output", formatter.format_string(input_data.format, input_data.values)

View File

@@ -270,17 +270,13 @@ class GetCurrentDateBlock(Block):
test_output=[
(
"date",
lambda t: abs(
datetime.now().date() - datetime.strptime(t, "%Y-%m-%d").date()
)
<= timedelta(days=8), # 7 days difference + 1 day error margin.
lambda t: abs(datetime.now() - datetime.strptime(t, "%Y-%m-%d"))
< timedelta(days=8), # 7 days difference + 1 day error margin.
),
(
"date",
lambda t: abs(
datetime.now().date() - datetime.strptime(t, "%m/%d/%Y").date()
)
<= timedelta(days=8),
lambda t: abs(datetime.now() - datetime.strptime(t, "%m/%d/%Y"))
< timedelta(days=8),
# 7 days difference + 1 day error margin.
),
(
@@ -386,7 +382,7 @@ class GetCurrentDateAndTimeBlock(Block):
lambda t: abs(
datetime.now().date() - datetime.strptime(t, "%Y/%m/%d").date()
)
<= timedelta(days=1), # Date format only, no time component
< timedelta(days=1), # Date format only, no time component
),
(
"date_time",

View File

@@ -26,14 +26,6 @@ class XMLParserBlock(Block):
)
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
# Security fix: Add size limits to prevent XML bomb attacks
MAX_XML_SIZE = 10 * 1024 * 1024 # 10MB limit for XML input
if len(input_data.input_xml) > MAX_XML_SIZE:
raise ValueError(
f"XML too large: {len(input_data.input_xml)} bytes > {MAX_XML_SIZE} bytes"
)
try:
tokens = tokenize(input_data.input_xml)
parser = Parser(tokens)

View File

@@ -9,7 +9,6 @@ from prisma.models import APIKey as PrismaAPIKey
from prisma.types import APIKeyWhereUniqueInput
from pydantic import BaseModel, Field
from backend.data.includes import MAX_USER_API_KEYS_FETCH
from backend.util.exceptions import NotAuthorizedError, NotFoundError
logger = logging.getLogger(__name__)
@@ -179,13 +178,9 @@ async def revoke_api_key(key_id: str, user_id: str) -> APIKeyInfo:
return APIKeyInfo.from_db(updated_api_key)
async def list_user_api_keys(
user_id: str, limit: int = MAX_USER_API_KEYS_FETCH
) -> list[APIKeyInfo]:
async def list_user_api_keys(user_id: str) -> list[APIKeyInfo]:
api_keys = await PrismaAPIKey.prisma().find_many(
where={"userId": user_id},
order={"createdAt": "desc"},
take=limit,
where={"userId": user_id}, order={"createdAt": "desc"}
)
return [APIKeyInfo.from_db(key) for key in api_keys]

View File

@@ -1,3 +1,4 @@
import functools
import inspect
import logging
import os
@@ -20,7 +21,6 @@ from typing import (
import jsonref
import jsonschema
from autogpt_libs.utils.cache import cached
from prisma.models import AgentBlock
from prisma.types import AgentBlockCreateInput
from pydantic import BaseModel
@@ -722,7 +722,7 @@ def get_block(block_id: str) -> Block[BlockSchema, BlockSchema] | None:
return cls() if cls else None
@cached()
@functools.cache
def get_webhook_block_ids() -> Sequence[str]:
return [
id
@@ -731,7 +731,7 @@ def get_webhook_block_ids() -> Sequence[str]:
]
@cached()
@functools.cache
def get_io_block_ids() -> Sequence[str]:
return [
id

View File

@@ -69,8 +69,6 @@ MODEL_COST: dict[LlmModel, int] = {
LlmModel.CLAUDE_4_1_OPUS: 21,
LlmModel.CLAUDE_4_OPUS: 21,
LlmModel.CLAUDE_4_SONNET: 5,
LlmModel.CLAUDE_4_5_HAIKU: 4,
LlmModel.CLAUDE_4_5_SONNET: 9,
LlmModel.CLAUDE_3_7_SONNET: 5,
LlmModel.CLAUDE_3_5_SONNET: 4,
LlmModel.CLAUDE_3_5_HAIKU: 1, # $0.80 / $4.00

View File

@@ -23,7 +23,6 @@ from pydantic import BaseModel
from backend.data import db
from backend.data.block_cost_config import BLOCK_COSTS
from backend.data.includes import MAX_CREDIT_REFUND_REQUESTS_FETCH
from backend.data.model import (
AutoTopUpConfig,
RefundRequest,
@@ -906,9 +905,7 @@ class UserCredit(UserCreditBase):
),
)
async def get_refund_requests(
self, user_id: str, limit: int = MAX_CREDIT_REFUND_REQUESTS_FETCH
) -> list[RefundRequest]:
async def get_refund_requests(self, user_id: str) -> list[RefundRequest]:
return [
RefundRequest(
id=r.id,
@@ -924,7 +921,6 @@ class UserCredit(UserCreditBase):
for r in await CreditRefundRequest.prisma().find_many(
where={"userId": user_id},
order={"createdAt": "desc"},
take=limit,
)
]

View File

@@ -83,7 +83,7 @@ async def disconnect():
# Transaction timeout constant (in milliseconds)
TRANSACTION_TIMEOUT = 30000 # 30 seconds - Increased from 15s to prevent timeout errors during graph creation under load
TRANSACTION_TIMEOUT = 15000 # 15 seconds - Increased from 5s to prevent timeout errors
@asynccontextmanager

View File

@@ -1,284 +0,0 @@
"""
Utilities for handling dynamic field names with special delimiters.
Dynamic fields allow graphs to connect complex data structures using special delimiters:
- _#_ for dictionary keys (e.g., "values_#_name" → values["name"])
- _$_ for list indices (e.g., "items_$_0" → items[0])
- _@_ for object attributes (e.g., "obj_@_attr" → obj.attr)
"""
from typing import Any
from backend.util.mock import MockObject
# Dynamic field delimiters
LIST_SPLIT = "_$_"
DICT_SPLIT = "_#_"
OBJC_SPLIT = "_@_"
DYNAMIC_DELIMITERS = (LIST_SPLIT, DICT_SPLIT, OBJC_SPLIT)
def extract_base_field_name(field_name: str) -> str:
"""
Extract the base field name from a dynamic field name by removing all dynamic suffixes.
Examples:
extract_base_field_name("values_#_name") → "values"
extract_base_field_name("items_$_0") → "items"
extract_base_field_name("obj_@_attr") → "obj"
extract_base_field_name("regular_field") → "regular_field"
Args:
field_name: The field name that may contain dynamic delimiters
Returns:
The base field name without any dynamic suffixes
"""
base_name = field_name
for delimiter in DYNAMIC_DELIMITERS:
if delimiter in base_name:
base_name = base_name.split(delimiter)[0]
return base_name
def is_dynamic_field(field_name: str) -> bool:
"""
Check if a field name contains dynamic delimiters.
Args:
field_name: The field name to check
Returns:
True if the field contains any dynamic delimiters, False otherwise
"""
return any(delimiter in field_name for delimiter in DYNAMIC_DELIMITERS)
def get_dynamic_field_description(field_name: str) -> str:
"""
Generate a description for a dynamic field based on its structure.
Args:
field_name: The full dynamic field name (e.g., "values_#_name")
Returns:
A descriptive string explaining what this dynamic field represents
"""
base_name = extract_base_field_name(field_name)
if DICT_SPLIT in field_name:
# Extract the key part after _#_
parts = field_name.split(DICT_SPLIT)
if len(parts) > 1:
key = parts[1].split("_")[0] if "_" in parts[1] else parts[1]
return f"Dictionary field '{key}' for base field '{base_name}' ({base_name}['{key}'])"
elif LIST_SPLIT in field_name:
# Extract the index part after _$_
parts = field_name.split(LIST_SPLIT)
if len(parts) > 1:
index = parts[1].split("_")[0] if "_" in parts[1] else parts[1]
return (
f"List item {index} for base field '{base_name}' ({base_name}[{index}])"
)
elif OBJC_SPLIT in field_name:
# Extract the attribute part after _@_
parts = field_name.split(OBJC_SPLIT)
if len(parts) > 1:
# Get the full attribute name (everything after _@_)
attr = parts[1]
return f"Object attribute '{attr}' for base field '{base_name}' ({base_name}.{attr})"
return f"Value for {field_name}"
# --------------------------------------------------------------------------- #
# Dynamic field parsing and merging utilities
# --------------------------------------------------------------------------- #
def _next_delim(s: str) -> tuple[str | None, int]:
"""
Return the *earliest* delimiter appearing in `s` and its index.
If none present → (None, -1).
"""
first: str | None = None
pos = len(s) # sentinel: larger than any real index
for d in DYNAMIC_DELIMITERS:
i = s.find(d)
if 0 <= i < pos:
first, pos = d, i
return first, (pos if first else -1)
def _tokenise(path: str) -> list[tuple[str, str]] | None:
"""
Convert the raw path string (starting with a delimiter) into
[ (delimiter, identifier), … ] or None if the syntax is malformed.
"""
tokens: list[tuple[str, str]] = []
while path:
# 1. Which delimiter starts this chunk?
delim = next((d for d in DYNAMIC_DELIMITERS if path.startswith(d)), None)
if delim is None:
return None # invalid syntax
# 2. Slice off the delimiter, then up to the next delimiter (or EOS)
path = path[len(delim) :]
nxt_delim, pos = _next_delim(path)
token, path = (
path[: pos if pos != -1 else len(path)],
path[pos if pos != -1 else len(path) :],
)
if token == "":
return None # empty identifier is invalid
tokens.append((delim, token))
return tokens
def parse_execution_output(output: tuple[str, Any], name: str) -> Any:
"""
Retrieve a nested value out of `output` using the flattened *name*.
On any failure (wrong name, wrong type, out-of-range, bad path)
returns **None**.
Args:
output: Tuple of (base_name, data) representing a block output entry
name: The flattened field name to extract from the output data
Returns:
The value at the specified path, or None if not found/invalid
"""
base_name, data = output
# Exact match → whole object
if name == base_name:
return data
# Must start with the expected name
if not name.startswith(base_name):
return None
path = name[len(base_name) :]
if not path:
return None # nothing left to parse
tokens = _tokenise(path)
if tokens is None:
return None
cur: Any = data
for delim, ident in tokens:
if delim == LIST_SPLIT:
# list[index]
try:
idx = int(ident)
except ValueError:
return None
if not isinstance(cur, list) or idx >= len(cur):
return None
cur = cur[idx]
elif delim == DICT_SPLIT:
if not isinstance(cur, dict) or ident not in cur:
return None
cur = cur[ident]
elif delim == OBJC_SPLIT:
if not hasattr(cur, ident):
return None
cur = getattr(cur, ident)
else:
return None # unreachable
return cur
def _assign(container: Any, tokens: list[tuple[str, str]], value: Any) -> Any:
"""
Recursive helper that *returns* the (possibly new) container with
`value` assigned along the remaining `tokens` path.
"""
if not tokens:
return value # leaf reached
delim, ident = tokens[0]
rest = tokens[1:]
# ---------- list ----------
if delim == LIST_SPLIT:
try:
idx = int(ident)
except ValueError:
raise ValueError("index must be an integer")
if container is None:
container = []
elif not isinstance(container, list):
container = list(container) if hasattr(container, "__iter__") else []
while len(container) <= idx:
container.append(None)
container[idx] = _assign(container[idx], rest, value)
return container
# ---------- dict ----------
if delim == DICT_SPLIT:
if container is None:
container = {}
elif not isinstance(container, dict):
container = dict(container) if hasattr(container, "items") else {}
container[ident] = _assign(container.get(ident), rest, value)
return container
# ---------- object ----------
if delim == OBJC_SPLIT:
if container is None:
container = MockObject()
elif not hasattr(container, "__dict__"):
# If it's not an object, create a new one
container = MockObject()
setattr(
container,
ident,
_assign(getattr(container, ident, None), rest, value),
)
return container
return value # unreachable
def merge_execution_input(data: dict[str, Any]) -> dict[str, Any]:
"""
Reconstruct nested objects from a *flattened* dict of key → value.
Raises ValueError on syntactically invalid list indices.
Args:
data: Dictionary with potentially flattened dynamic field keys
Returns:
Dictionary with nested objects reconstructed from flattened keys
"""
merged: dict[str, Any] = {}
for key, value in data.items():
# Split off the base name (before the first delimiter, if any)
delim, pos = _next_delim(key)
if delim is None:
merged[key] = value
continue
base, path = key[:pos], key[pos:]
tokens = _tokenise(path)
if tokens is None:
# Invalid key; treat as scalar under the raw name
merged[key] = value
continue
merged[base] = _assign(merged.get(base), tokens, value)
data.update(merged)
return data

View File

@@ -38,8 +38,8 @@ from prisma.types import (
from pydantic import BaseModel, ConfigDict, JsonValue, ValidationError
from pydantic.fields import Field
from backend.server.v2.store.exceptions import DatabaseError
from backend.util import type as type_utils
from backend.util.exceptions import DatabaseError
from backend.util.json import SafeJson
from backend.util.models import Pagination
from backend.util.retry import func_retry
@@ -92,31 +92,6 @@ ExecutionStatus = AgentExecutionStatus
NodeInputMask = Mapping[str, JsonValue]
NodesInputMasks = Mapping[str, NodeInputMask]
# dest: source
VALID_STATUS_TRANSITIONS = {
ExecutionStatus.QUEUED: [
ExecutionStatus.INCOMPLETE,
],
ExecutionStatus.RUNNING: [
ExecutionStatus.INCOMPLETE,
ExecutionStatus.QUEUED,
ExecutionStatus.TERMINATED, # For resuming halted execution
],
ExecutionStatus.COMPLETED: [
ExecutionStatus.RUNNING,
],
ExecutionStatus.FAILED: [
ExecutionStatus.INCOMPLETE,
ExecutionStatus.QUEUED,
ExecutionStatus.RUNNING,
],
ExecutionStatus.TERMINATED: [
ExecutionStatus.INCOMPLETE,
ExecutionStatus.QUEUED,
ExecutionStatus.RUNNING,
],
}
class GraphExecutionMeta(BaseDbModel):
id: str # type: ignore # Override base class to make this required
@@ -130,8 +105,6 @@ class GraphExecutionMeta(BaseDbModel):
status: ExecutionStatus
started_at: datetime
ended_at: datetime
is_shared: bool = False
share_token: Optional[str] = None
class Stats(BaseModel):
model_config = ConfigDict(
@@ -248,8 +221,6 @@ class GraphExecutionMeta(BaseDbModel):
if stats
else None
),
is_shared=_graph_exec.isShared,
share_token=_graph_exec.shareToken,
)
@@ -478,48 +449,6 @@ async def get_graph_executions(
return [GraphExecutionMeta.from_db(execution) for execution in executions]
async def get_graph_executions_count(
user_id: Optional[str] = None,
graph_id: Optional[str] = None,
statuses: Optional[list[ExecutionStatus]] = None,
created_time_gte: Optional[datetime] = None,
created_time_lte: Optional[datetime] = None,
) -> int:
"""
Get count of graph executions with optional filters.
Args:
user_id: Optional user ID to filter by
graph_id: Optional graph ID to filter by
statuses: Optional list of execution statuses to filter by
created_time_gte: Optional minimum creation time
created_time_lte: Optional maximum creation time
Returns:
Count of matching graph executions
"""
where_filter: AgentGraphExecutionWhereInput = {
"isDeleted": False,
}
if user_id:
where_filter["userId"] = user_id
if graph_id:
where_filter["agentGraphId"] = graph_id
if created_time_gte or created_time_lte:
where_filter["createdAt"] = {
"gte": created_time_gte or datetime.min.replace(tzinfo=timezone.utc),
"lte": created_time_lte or datetime.max.replace(tzinfo=timezone.utc),
}
if statuses:
where_filter["OR"] = [{"executionStatus": status} for status in statuses]
count = await AgentGraphExecution.prisma().count(where=where_filter)
return count
class GraphExecutionsPaginated(BaseModel):
"""Response schema for paginated graph executions."""
@@ -651,7 +580,7 @@ async def create_graph_execution(
data={
"agentGraphId": graph_id,
"agentGraphVersion": graph_version,
"executionStatus": ExecutionStatus.INCOMPLETE,
"executionStatus": ExecutionStatus.QUEUED,
"inputs": SafeJson(inputs),
"credentialInputs": (
SafeJson(credential_inputs) if credential_inputs else Json({})
@@ -798,11 +727,6 @@ async def update_graph_execution_stats(
status: ExecutionStatus | None = None,
stats: GraphExecutionStats | None = None,
) -> GraphExecution | None:
if not status and not stats:
raise ValueError(
f"Must provide either status or stats to update for execution {graph_exec_id}"
)
update_data: AgentGraphExecutionUpdateManyMutationInput = {}
if stats:
@@ -814,25 +738,20 @@ async def update_graph_execution_stats(
if status:
update_data["executionStatus"] = status
where_clause: AgentGraphExecutionWhereInput = {"id": graph_exec_id}
if status:
if allowed_from := VALID_STATUS_TRANSITIONS.get(status, []):
# Add OR clause to check if current status is one of the allowed source statuses
where_clause["AND"] = [
{"id": graph_exec_id},
{"OR": [{"executionStatus": s} for s in allowed_from]},
]
else:
raise ValueError(
f"Status {status} cannot be set via update for execution {graph_exec_id}. "
f"This status can only be set at creation or is not a valid target status."
)
await AgentGraphExecution.prisma().update_many(
where=where_clause,
updated_count = await AgentGraphExecution.prisma().update_many(
where={
"id": graph_exec_id,
"OR": [
{"executionStatus": ExecutionStatus.RUNNING},
{"executionStatus": ExecutionStatus.QUEUED},
# Terminated graph can be resumed.
{"executionStatus": ExecutionStatus.TERMINATED},
],
},
data=update_data,
)
if updated_count == 0:
return None
graph_exec = await AgentGraphExecution.prisma().find_unique_or_raise(
where={"id": graph_exec_id},
@@ -840,7 +759,6 @@ async def update_graph_execution_stats(
[*get_io_block_ids(), *get_webhook_block_ids()]
),
)
return GraphExecution.from_db(graph_exec)
@@ -1067,18 +985,6 @@ class NodeExecutionEvent(NodeExecutionResult):
)
class SharedExecutionResponse(BaseModel):
"""Public-safe response for shared executions"""
id: str
graph_name: str
graph_description: Optional[str]
status: ExecutionStatus
created_at: datetime
outputs: CompletedBlockOutput # Only the final outputs, no intermediate data
# Deliberately exclude: user_id, inputs, credentials, node details
ExecutionEvent = Annotated[
GraphExecutionEvent | NodeExecutionEvent, Field(discriminator="event_type")
]
@@ -1256,98 +1162,3 @@ async def get_block_error_stats(
)
for row in result
]
async def update_graph_execution_share_status(
execution_id: str,
user_id: str,
is_shared: bool,
share_token: str | None,
shared_at: datetime | None,
) -> None:
"""Update the sharing status of a graph execution."""
await AgentGraphExecution.prisma().update(
where={"id": execution_id},
data={
"isShared": is_shared,
"shareToken": share_token,
"sharedAt": shared_at,
},
)
async def get_graph_execution_by_share_token(
share_token: str,
) -> SharedExecutionResponse | None:
"""Get a shared execution with limited public-safe data."""
execution = await AgentGraphExecution.prisma().find_first(
where={
"shareToken": share_token,
"isShared": True,
"isDeleted": False,
},
include={
"AgentGraph": True,
"NodeExecutions": {
"include": {
"Output": True,
"Node": {
"include": {
"AgentBlock": True,
}
},
},
},
},
)
if not execution:
return None
# Extract outputs from OUTPUT blocks only (consistent with GraphExecution.from_db)
outputs: CompletedBlockOutput = defaultdict(list)
if execution.NodeExecutions:
for node_exec in execution.NodeExecutions:
if node_exec.Node and node_exec.Node.agentBlockId:
# Get the block definition to check its type
block = get_block(node_exec.Node.agentBlockId)
if block and block.block_type == BlockType.OUTPUT:
# For OUTPUT blocks, the data is stored in executionData or Input
# The executionData contains the structured input with 'name' and 'value' fields
if hasattr(node_exec, "executionData") and node_exec.executionData:
exec_data = type_utils.convert(
node_exec.executionData, dict[str, Any]
)
if "name" in exec_data:
name = exec_data["name"]
value = exec_data.get("value")
outputs[name].append(value)
elif node_exec.Input:
# Build input_data from Input relation
input_data = {}
for data in node_exec.Input:
if data.name and data.data is not None:
input_data[data.name] = type_utils.convert(
data.data, JsonValue
)
if "name" in input_data:
name = input_data["name"]
value = input_data.get("value")
outputs[name].append(value)
return SharedExecutionResponse(
id=execution.id,
graph_name=(
execution.AgentGraph.name
if (execution.AgentGraph and execution.AgentGraph.name)
else "Untitled Agent"
),
graph_description=(
execution.AgentGraph.description if execution.AgentGraph else None
),
status=ExecutionStatus(execution.executionStatus),
created_at=execution.createdAt,
outputs=outputs,
)

View File

@@ -7,7 +7,7 @@ from prisma.enums import AgentExecutionStatus
from backend.data.execution import get_graph_executions
from backend.data.graph import get_graph_metadata
from backend.data.model import UserExecutionSummaryStats
from backend.util.exceptions import DatabaseError
from backend.server.v2.store.exceptions import DatabaseError
from backend.util.logging import TruncatedLogger
logger = TruncatedLogger(logging.getLogger(__name__), prefix="[SummaryData]")

View File

@@ -1,7 +1,6 @@
import logging
import uuid
from collections import defaultdict
from datetime import datetime, timezone
from typing import TYPE_CHECKING, Any, Literal, Optional, cast
from prisma.enums import SubmissionStatus
@@ -20,8 +19,6 @@ from backend.blocks.agent import AgentExecutorBlock
from backend.blocks.io import AgentInputBlock, AgentOutputBlock
from backend.blocks.llm import LlmModel
from backend.data.db import prisma as db
from backend.data.dynamic_fields import extract_base_field_name
from backend.data.includes import MAX_GRAPH_VERSIONS_FETCH
from backend.data.model import (
CredentialsField,
CredentialsFieldInfo,
@@ -31,17 +28,8 @@ from backend.data.model import (
from backend.integrations.providers import ProviderName
from backend.util import type as type_utils
from backend.util.json import SafeJson
from backend.util.models import Pagination
from .block import (
Block,
BlockInput,
BlockSchema,
BlockType,
EmptySchema,
get_block,
get_blocks,
)
from .block import Block, BlockInput, BlockSchema, BlockType, get_block, get_blocks
from .db import BaseDbModel, query_raw_with_schema, transaction
from .includes import AGENT_GRAPH_INCLUDE, AGENT_NODE_INCLUDE
@@ -82,15 +70,12 @@ class Node(BaseDbModel):
output_links: list[Link] = []
@property
def block(self) -> "Block[BlockSchema, BlockSchema] | _UnknownBlockBase":
"""Get the block for this node. Returns UnknownBlock if block is deleted/missing."""
def block(self) -> Block[BlockSchema, BlockSchema]:
block = get_block(self.block_id)
if not block:
# Log warning but don't raise exception - return a placeholder block for deleted blocks
logger.warning(
f"Block #{self.block_id} does not exist for Node #{self.id} (deleted/missing block), using UnknownBlock"
raise ValueError(
f"Block #{self.block_id} does not exist -> Node #{self.id} is invalid"
)
return _UnknownBlockBase(self.block_id)
return block
@@ -129,20 +114,17 @@ class NodeModel(Node):
Returns a copy of the node model, stripped of any non-transferable properties
"""
stripped_node = self.model_copy(deep=True)
# Remove credentials and other (possible) secrets from node input
# Remove credentials from node input
if stripped_node.input_default:
stripped_node.input_default = NodeModel._filter_secrets_from_node_input(
stripped_node.input_default, self.block.input_schema.jsonschema()
)
# Remove default secret value from secret input nodes
if (
stripped_node.block.block_type == BlockType.INPUT
and stripped_node.input_default.get("secret", False) is True
and "value" in stripped_node.input_default
):
del stripped_node.input_default["value"]
stripped_node.input_default["value"] = ""
# Remove webhook info
stripped_node.webhook_id = None
@@ -159,10 +141,8 @@ class NodeModel(Node):
result = {}
for key, value in input_data.items():
field_schema: dict | None = field_schemas.get(key)
if (field_schema and field_schema.get("secret", False)) or (
any(sensitive_key in key.lower() for sensitive_key in sensitive_keys)
# Prevent removing `secret` flag on input nodes
and type(value) is not bool
if (field_schema and field_schema.get("secret", False)) or any(
sensitive_key in key.lower() for sensitive_key in sensitive_keys
):
# This is a secret value -> filter this key-value pair out
continue
@@ -180,7 +160,6 @@ class BaseGraph(BaseDbModel):
is_active: bool = True
name: str
description: str
instructions: str | None = None
recommended_schedule_cron: str | None = None
nodes: list[Node] = []
links: list[Link] = []
@@ -402,8 +381,6 @@ class GraphModel(Graph):
user_id: str
nodes: list[NodeModel] = [] # type: ignore
created_at: datetime
@property
def starting_nodes(self) -> list[NodeModel]:
outbound_nodes = {link.sink_id for link in self.links}
@@ -416,10 +393,6 @@ class GraphModel(Graph):
if node.id not in outbound_nodes or node.id in input_nodes
]
@property
def webhook_input_node(self) -> NodeModel | None: # type: ignore
return cast(NodeModel, super().webhook_input_node)
def meta(self) -> "GraphMeta":
"""
Returns a GraphMeta object with metadata about the graph.
@@ -721,11 +694,9 @@ class GraphModel(Graph):
version=graph.version,
forked_from_id=graph.forkedFromId,
forked_from_version=graph.forkedFromVersion,
created_at=graph.createdAt,
is_active=graph.isActive,
name=graph.name or "",
description=graph.description or "",
instructions=graph.instructions,
recommended_schedule_cron=graph.recommendedScheduleCron,
nodes=[NodeModel.from_db(node, for_export) for node in graph.Nodes or []],
links=list(
@@ -747,7 +718,7 @@ def _is_tool_pin(name: str) -> bool:
def _sanitize_pin_name(name: str) -> str:
sanitized_name = extract_base_field_name(name)
sanitized_name = name.split("_#_")[0].split("_@_")[0].split("_$_")[0]
if _is_tool_pin(sanitized_name):
return "tools"
return sanitized_name
@@ -765,13 +736,6 @@ class GraphMeta(Graph):
return GraphMeta(**graph.model_dump())
class GraphsPaginated(BaseModel):
"""Response schema for paginated graphs."""
graphs: list[GraphMeta]
pagination: Pagination
# --------------------- CRUD functions --------------------- #
@@ -800,42 +764,31 @@ async def set_node_webhook(node_id: str, webhook_id: str | None) -> NodeModel:
return NodeModel.from_db(node)
async def list_graphs_paginated(
async def list_graphs(
user_id: str,
page: int = 1,
page_size: int = 25,
filter_by: Literal["active"] | None = "active",
) -> GraphsPaginated:
) -> list[GraphMeta]:
"""
Retrieves paginated graph metadata objects.
Retrieves graph metadata objects.
Default behaviour is to get all currently active graphs.
Args:
user_id: The ID of the user that owns the graphs.
page: Page number (1-based).
page_size: Number of graphs per page.
filter_by: An optional filter to either select graphs.
user_id: The ID of the user that owns the graph.
Returns:
GraphsPaginated: Paginated list of graph metadata.
list[GraphMeta]: A list of objects representing the retrieved graphs.
"""
where_clause: AgentGraphWhereInput = {"userId": user_id}
if filter_by == "active":
where_clause["isActive"] = True
# Get total count
total_count = await AgentGraph.prisma().count(where=where_clause)
total_pages = (total_count + page_size - 1) // page_size
# Get paginated results
offset = (page - 1) * page_size
graphs = await AgentGraph.prisma().find_many(
where=where_clause,
distinct=["id"],
order={"version": "desc"},
include=AGENT_GRAPH_INCLUDE,
skip=offset,
take=page_size,
)
graph_models: list[GraphMeta] = []
@@ -849,15 +802,7 @@ async def list_graphs_paginated(
logger.error(f"Error processing graph {graph.id}: {e}")
continue
return GraphsPaginated(
graphs=graph_models,
pagination=Pagination(
total_items=total_count,
total_pages=total_pages,
current_page=page,
page_size=page_size,
),
)
return graph_models
async def get_graph_metadata(graph_id: str, version: int | None = None) -> Graph | None:
@@ -1077,14 +1022,11 @@ async def set_graph_active_version(graph_id: str, version: int, user_id: str) ->
)
async def get_graph_all_versions(
graph_id: str, user_id: str, limit: int = MAX_GRAPH_VERSIONS_FETCH
) -> list[GraphModel]:
async def get_graph_all_versions(graph_id: str, user_id: str) -> list[GraphModel]:
graph_versions = await AgentGraph.prisma().find_many(
where={"id": graph_id, "userId": user_id},
order={"version": "desc"},
include=AGENT_GRAPH_INCLUDE,
take=limit,
)
if not graph_versions:
@@ -1202,7 +1144,6 @@ def make_graph_model(creatable_graph: Graph, user_id: str) -> GraphModel:
return GraphModel(
**creatable_graph.model_dump(exclude={"nodes"}),
user_id=user_id,
created_at=datetime.now(tz=timezone.utc),
nodes=[
NodeModel(
**creatable_node.model_dump(),
@@ -1333,34 +1274,3 @@ async def migrate_llm_models(migrate_to: LlmModel):
id,
path,
)
# Simple placeholder class for deleted/missing blocks
class _UnknownBlockBase(Block):
"""
Placeholder for deleted/missing blocks that inherits from Block
but uses a name that doesn't end with 'Block' to avoid auto-discovery.
"""
def __init__(self, block_id: str = "00000000-0000-0000-0000-000000000000"):
# Initialize with minimal valid Block parameters
super().__init__(
id=block_id,
description=f"Unknown or deleted block (original ID: {block_id})",
disabled=True,
input_schema=EmptySchema,
output_schema=EmptySchema,
categories=set(),
contributors=[],
static_output=False,
block_type=BlockType.STANDARD,
webhook_config=None,
)
@property
def name(self):
return "UnknownBlock"
async def run(self, input_data, **kwargs):
"""Always yield an error for missing blocks."""
yield "error", f"Block {self.id} no longer exists"

View File

@@ -201,56 +201,25 @@ async def test_get_input_schema(server: SpinTestServer, snapshot: Snapshot):
@pytest.mark.asyncio(loop_scope="session")
async def test_clean_graph(server: SpinTestServer):
"""
Test the stripped_for_export function that:
1. Removes sensitive/secret fields from node inputs
2. Removes webhook information
3. Preserves non-sensitive data including input block values
Test the clean_graph function that:
1. Clears input block values
2. Removes credentials from nodes
"""
# Create a graph with input blocks containing both sensitive and normal data
# Create a graph with input blocks and credentials
graph = Graph(
id="test_clean_graph",
name="Test Clean Graph",
description="Test graph cleaning",
nodes=[
Node(
id="input_node",
block_id=AgentInputBlock().id,
input_default={
"_test_id": "input_node",
"name": "test_input",
"value": "test value", # This should be preserved
"value": "test value",
"description": "Test input description",
},
),
Node(
block_id=AgentInputBlock().id,
input_default={
"_test_id": "input_node_secret",
"name": "secret_input",
"value": "another value",
"secret": True, # This makes the input secret
},
),
Node(
block_id=StoreValueBlock().id,
input_default={
"_test_id": "node_with_secrets",
"input": "normal_value",
"control_test_input": "should be preserved",
"api_key": "secret_api_key_123", # Should be filtered
"password": "secret_password_456", # Should be filtered
"token": "secret_token_789", # Should be filtered
"credentials": { # Should be filtered
"id": "fake-github-credentials-id",
"provider": "github",
"type": "api_key",
},
"anthropic_credentials": { # Should be filtered
"id": "fake-anthropic-credentials-id",
"provider": "anthropic",
"type": "api_key",
},
},
),
],
links=[],
)
@@ -262,54 +231,15 @@ async def test_clean_graph(server: SpinTestServer):
)
# Clean the graph
cleaned_graph = await server.agent_server.test_get_graph(
created_graph = await server.agent_server.test_get_graph(
created_graph.id, created_graph.version, DEFAULT_USER_ID, for_export=True
)
# Verify sensitive fields are removed but normal fields are preserved
# # Verify input block value is cleared
input_node = next(
n for n in cleaned_graph.nodes if n.input_default["_test_id"] == "input_node"
n for n in created_graph.nodes if n.block_id == AgentInputBlock().id
)
# Non-sensitive fields should be preserved
assert input_node.input_default["name"] == "test_input"
assert input_node.input_default["value"] == "test value" # Should be preserved now
assert input_node.input_default["description"] == "Test input description"
# Sensitive fields should be filtered out
assert "api_key" not in input_node.input_default
assert "password" not in input_node.input_default
# Verify secret input node preserves non-sensitive fields but removes secret value
secret_node = next(
n
for n in cleaned_graph.nodes
if n.input_default["_test_id"] == "input_node_secret"
)
assert secret_node.input_default["name"] == "secret_input"
assert "value" not in secret_node.input_default # Secret default should be removed
assert secret_node.input_default["secret"] is True
# Verify sensitive fields are filtered from nodes with secrets
secrets_node = next(
n
for n in cleaned_graph.nodes
if n.input_default["_test_id"] == "node_with_secrets"
)
# Normal fields should be preserved
assert secrets_node.input_default["input"] == "normal_value"
assert secrets_node.input_default["control_test_input"] == "should be preserved"
# Sensitive fields should be filtered out
assert "api_key" not in secrets_node.input_default
assert "password" not in secrets_node.input_default
assert "token" not in secrets_node.input_default
assert "credentials" not in secrets_node.input_default
assert "anthropic_credentials" not in secrets_node.input_default
# Verify webhook info is removed (if any nodes had it)
for node in cleaned_graph.nodes:
assert node.webhook_id is None
assert node.webhook is None
assert input_node.input_default["value"] == ""
@pytest.mark.asyncio(loop_scope="session")

View File

@@ -14,7 +14,6 @@ AGENT_GRAPH_INCLUDE: prisma.types.AgentGraphInclude = {
"Nodes": {"include": AGENT_NODE_INCLUDE}
}
EXECUTION_RESULT_ORDER: list[prisma.types.AgentNodeExecutionOrderByInput] = [
{"queuedTime": "desc"},
# Fallback: Incomplete execs has no queuedTime.
@@ -29,13 +28,6 @@ EXECUTION_RESULT_INCLUDE: prisma.types.AgentNodeExecutionInclude = {
}
MAX_NODE_EXECUTIONS_FETCH = 1000
MAX_LIBRARY_AGENT_EXECUTIONS_FETCH = 10
# Default limits for potentially large result sets
MAX_CREDIT_REFUND_REQUESTS_FETCH = 100
MAX_INTEGRATION_WEBHOOKS_FETCH = 100
MAX_USER_API_KEYS_FETCH = 500
MAX_GRAPH_VERSIONS_FETCH = 50
GRAPH_EXECUTION_INCLUDE_WITH_NODES: prisma.types.AgentGraphExecutionInclude = {
"NodeExecutions": {
@@ -79,56 +71,13 @@ INTEGRATION_WEBHOOK_INCLUDE: prisma.types.IntegrationWebhookInclude = {
}
def library_agent_include(
user_id: str,
include_nodes: bool = True,
include_executions: bool = True,
execution_limit: int = MAX_LIBRARY_AGENT_EXECUTIONS_FETCH,
) -> prisma.types.LibraryAgentInclude:
"""
Fully configurable includes for library agent queries with performance optimization.
Args:
user_id: User ID for filtering user-specific data
include_nodes: Whether to include graph nodes (default: True, needed for get_sub_graphs)
include_executions: Whether to include executions (default: True, safe with execution_limit)
execution_limit: Limit on executions to fetch (default: MAX_LIBRARY_AGENT_EXECUTIONS_FETCH)
Defaults maintain backward compatibility and safety - includes everything needed for all functionality.
For performance optimization, explicitly set include_nodes=False and include_executions=False
for listing views where frontend fetches data separately.
Performance impact:
- Default (full nodes + limited executions): Original performance, works everywhere
- Listing optimization (no nodes/executions): ~2s for 15 agents vs potential timeouts
- Unlimited executions: varies by user (thousands of executions = timeouts)
"""
result: prisma.types.LibraryAgentInclude = {
"Creator": True, # Always needed for creator info
}
# Build AgentGraph include based on requested options
if include_nodes or include_executions:
agent_graph_include = {}
# Add nodes if requested (always full nodes)
if include_nodes:
agent_graph_include.update(AGENT_GRAPH_INCLUDE) # Full nodes
# Add executions if requested
if include_executions:
agent_graph_include["Executions"] = {
"where": {"userId": user_id},
"order_by": {"createdAt": "desc"},
"take": execution_limit,
def library_agent_include(user_id: str) -> prisma.types.LibraryAgentInclude:
return {
"AgentGraph": {
"include": {
**AGENT_GRAPH_INCLUDE,
"Executions": {"where": {"userId": user_id}},
}
result["AgentGraph"] = cast(
prisma.types.AgentGraphArgsFromLibraryAgent,
{"include": agent_graph_include},
)
else:
# Default: Basic metadata only (fast - recommended for most use cases)
result["AgentGraph"] = True # Basic graph metadata (name, description, id)
return result
},
"Creator": True,
}

View File

@@ -11,10 +11,7 @@ from prisma.types import (
from pydantic import Field, computed_field
from backend.data.event_bus import AsyncRedisEventBus
from backend.data.includes import (
INTEGRATION_WEBHOOK_INCLUDE,
MAX_INTEGRATION_WEBHOOKS_FETCH,
)
from backend.data.includes import INTEGRATION_WEBHOOK_INCLUDE
from backend.integrations.providers import ProviderName
from backend.integrations.webhooks.utils import webhook_ingress_url
from backend.server.v2.library.model import LibraryAgentPreset
@@ -131,36 +128,22 @@ async def get_webhook(
@overload
async def get_all_webhooks_by_creds(
user_id: str,
credentials_id: str,
*,
include_relations: Literal[True],
limit: int = MAX_INTEGRATION_WEBHOOKS_FETCH,
user_id: str, credentials_id: str, *, include_relations: Literal[True]
) -> list[WebhookWithRelations]: ...
@overload
async def get_all_webhooks_by_creds(
user_id: str,
credentials_id: str,
*,
include_relations: Literal[False] = False,
limit: int = MAX_INTEGRATION_WEBHOOKS_FETCH,
user_id: str, credentials_id: str, *, include_relations: Literal[False] = False
) -> list[Webhook]: ...
async def get_all_webhooks_by_creds(
user_id: str,
credentials_id: str,
*,
include_relations: bool = False,
limit: int = MAX_INTEGRATION_WEBHOOKS_FETCH,
user_id: str, credentials_id: str, *, include_relations: bool = False
) -> list[Webhook] | list[WebhookWithRelations]:
if not credentials_id:
raise ValueError("credentials_id must not be empty")
webhooks = await IntegrationWebhook.prisma().find_many(
where={"userId": user_id, "credentialsId": credentials_id},
include=INTEGRATION_WEBHOOK_INCLUDE if include_relations else None,
order={"createdAt": "desc"},
take=limit,
)
return [
(WebhookWithRelations if include_relations else Webhook).from_db(webhook)

View File

@@ -270,7 +270,6 @@ def SchemaField(
min_length: Optional[int] = None,
max_length: Optional[int] = None,
discriminator: Optional[str] = None,
format: Optional[str] = None,
json_schema_extra: Optional[dict[str, Any]] = None,
) -> T:
if default is PydanticUndefined and default_factory is None:
@@ -286,7 +285,6 @@ def SchemaField(
"advanced": advanced,
"hidden": hidden,
"depends_on": depends_on,
"format": format,
**(json_schema_extra or {}),
}.items()
if v is not None

View File

@@ -15,7 +15,7 @@ from prisma.types import (
# from backend.notifications.models import NotificationEvent
from pydantic import BaseModel, ConfigDict, EmailStr, Field, field_validator
from backend.util.exceptions import DatabaseError
from backend.server.v2.store.exceptions import DatabaseError
from backend.util.json import SafeJson
from backend.util.logging import TruncatedLogger
@@ -235,7 +235,6 @@ class BaseEventModel(BaseModel):
class NotificationEventModel(BaseEventModel, Generic[NotificationDataType_co]):
id: Optional[str] = None # None when creating, populated when reading from DB
data: NotificationDataType_co
@property
@@ -379,7 +378,6 @@ class NotificationPreference(BaseModel):
class UserNotificationEventDTO(BaseModel):
id: str # Added to track notifications for removal
type: NotificationType
data: dict
created_at: datetime
@@ -388,7 +386,6 @@ class UserNotificationEventDTO(BaseModel):
@staticmethod
def from_db(model: NotificationEvent) -> "UserNotificationEventDTO":
return UserNotificationEventDTO(
id=model.id,
type=model.type,
data=dict(model.data),
created_at=model.createdAt,
@@ -544,79 +541,6 @@ async def empty_user_notification_batch(
) from e
async def clear_all_user_notification_batches(user_id: str) -> None:
"""Clear ALL notification batches for a user across all types.
Used when user's email is bounced/inactive and we should stop
trying to send them ANY emails.
"""
try:
# Delete all notification events for this user
await NotificationEvent.prisma().delete_many(
where={"UserNotificationBatch": {"is": {"userId": user_id}}}
)
# Delete all batches for this user
await UserNotificationBatch.prisma().delete_many(where={"userId": user_id})
logger.info(f"Cleared all notification batches for user {user_id}")
except Exception as e:
raise DatabaseError(
f"Failed to clear all notification batches for user {user_id}: {e}"
) from e
async def remove_notifications_from_batch(
user_id: str, notification_type: NotificationType, notification_ids: list[str]
) -> None:
"""Remove specific notifications from a user's batch by their IDs.
This is used after successful sending to remove only the
sent notifications, preventing duplicates on retry.
"""
if not notification_ids:
return
try:
# Delete the specific notification events
deleted_count = await NotificationEvent.prisma().delete_many(
where={
"id": {"in": notification_ids},
"UserNotificationBatch": {
"is": {"userId": user_id, "type": notification_type}
},
}
)
logger.info(
f"Removed {deleted_count} notifications from batch for user {user_id}"
)
# Check if batch is now empty and delete it if so
remaining = await NotificationEvent.prisma().count(
where={
"UserNotificationBatch": {
"is": {"userId": user_id, "type": notification_type}
}
}
)
if remaining == 0:
await UserNotificationBatch.prisma().delete_many(
where=UserNotificationBatchWhereInput(
userId=user_id,
type=notification_type,
)
)
logger.info(
f"Deleted empty batch for user {user_id} and type {notification_type}"
)
except Exception as e:
raise DatabaseError(
f"Failed to remove notifications from batch for user {user_id} and type {notification_type}: {e}"
) from e
async def get_user_notification_batch(
user_id: str,
notification_type: NotificationType,

View File

@@ -1,16 +1,15 @@
import re
from datetime import datetime
from typing import Any, Optional
import prisma
import pydantic
from autogpt_libs.utils.cache import cached
from prisma.enums import OnboardingStep
from prisma.models import UserOnboarding
from prisma.types import UserOnboardingCreateInput, UserOnboardingUpdateInput
from backend.data.block import get_blocks
from backend.data.credit import get_user_credit_model
from backend.data.graph import GraphModel
from backend.data.model import CredentialsMetaInput
from backend.server.v2.store.model import StoreAgentDetails
from backend.util.json import SafeJson
@@ -31,7 +30,7 @@ user_credit = get_user_credit_model()
class UserOnboardingUpdate(pydantic.BaseModel):
completedSteps: Optional[list[OnboardingStep]] = None
walletShown: Optional[bool] = None
notificationDot: Optional[bool] = None
notified: Optional[list[OnboardingStep]] = None
usageReason: Optional[str] = None
integrations: Optional[list[str]] = None
@@ -40,8 +39,6 @@ class UserOnboardingUpdate(pydantic.BaseModel):
agentInput: Optional[dict[str, Any]] = None
onboardingAgentExecutionId: Optional[str] = None
agentRuns: Optional[int] = None
lastRunAt: Optional[datetime] = None
consecutiveRunDays: Optional[int] = None
async def get_user_onboarding(user_id: str):
@@ -60,22 +57,16 @@ async def update_user_onboarding(user_id: str, data: UserOnboardingUpdate):
update["completedSteps"] = list(set(data.completedSteps))
for step in (
OnboardingStep.AGENT_NEW_RUN,
OnboardingStep.MARKETPLACE_VISIT,
OnboardingStep.RUN_AGENTS,
OnboardingStep.MARKETPLACE_ADD_AGENT,
OnboardingStep.MARKETPLACE_RUN_AGENT,
OnboardingStep.BUILDER_SAVE_AGENT,
OnboardingStep.RE_RUN_AGENT,
OnboardingStep.SCHEDULE_AGENT,
OnboardingStep.RUN_AGENTS,
OnboardingStep.RUN_3_DAYS,
OnboardingStep.TRIGGER_WEBHOOK,
OnboardingStep.RUN_14_DAYS,
OnboardingStep.RUN_AGENTS_100,
OnboardingStep.BUILDER_RUN_AGENT,
):
if step in data.completedSteps:
await reward_user(user_id, step)
if data.walletShown is not None:
update["walletShown"] = data.walletShown
if data.notificationDot is not None:
update["notificationDot"] = data.notificationDot
if data.notified is not None:
update["notified"] = list(set(data.notified))
if data.usageReason is not None:
@@ -92,10 +83,6 @@ async def update_user_onboarding(user_id: str, data: UserOnboardingUpdate):
update["onboardingAgentExecutionId"] = data.onboardingAgentExecutionId
if data.agentRuns is not None:
update["agentRuns"] = data.agentRuns
if data.lastRunAt is not None:
update["lastRunAt"] = data.lastRunAt
if data.consecutiveRunDays is not None:
update["consecutiveRunDays"] = data.consecutiveRunDays
return await UserOnboarding.prisma().upsert(
where={"userId": user_id},
@@ -114,28 +101,16 @@ async def reward_user(user_id: str, step: OnboardingStep):
# This is seen as a reward for the GET_RESULTS step in the wallet
case OnboardingStep.AGENT_NEW_RUN:
reward = 300
case OnboardingStep.MARKETPLACE_VISIT:
reward = 100
case OnboardingStep.RUN_AGENTS:
reward = 300
case OnboardingStep.MARKETPLACE_ADD_AGENT:
reward = 100
case OnboardingStep.MARKETPLACE_RUN_AGENT:
reward = 100
case OnboardingStep.BUILDER_SAVE_AGENT:
reward = 100
case OnboardingStep.RE_RUN_AGENT:
case OnboardingStep.BUILDER_RUN_AGENT:
reward = 100
case OnboardingStep.SCHEDULE_AGENT:
reward = 100
case OnboardingStep.RUN_AGENTS:
reward = 300
case OnboardingStep.RUN_3_DAYS:
reward = 100
case OnboardingStep.TRIGGER_WEBHOOK:
reward = 100
case OnboardingStep.RUN_14_DAYS:
reward = 300
case OnboardingStep.RUN_AGENTS_100:
reward = 300
if reward == 0:
return
@@ -157,22 +132,6 @@ async def reward_user(user_id: str, step: OnboardingStep):
)
async def complete_webhook_trigger_step(user_id: str):
"""
Completes the TRIGGER_WEBHOOK onboarding step for the user if not already completed.
"""
onboarding = await get_user_onboarding(user_id)
if OnboardingStep.TRIGGER_WEBHOOK not in onboarding.completedSteps:
await update_user_onboarding(
user_id,
UserOnboardingUpdate(
completedSteps=onboarding.completedSteps
+ [OnboardingStep.TRIGGER_WEBHOOK]
),
)
def clean_and_split(text: str) -> list[str]:
"""
Removes all special characters from a string, truncates it to 100 characters,
@@ -277,14 +236,8 @@ async def get_recommended_agents(user_id: str) -> list[StoreAgentDetails]:
for word in user_onboarding.integrations
]
where_clause["is_available"] = True
# Try to take only agents that are available and allowed for onboarding
storeAgents = await prisma.models.StoreAgent.prisma().find_many(
where={
"is_available": True,
"useForOnboarding": True,
},
where=prisma.types.StoreAgentWhereInput(**where_clause),
order=[
{"featured": "desc"},
{"runs": "desc"},
@@ -293,16 +246,59 @@ async def get_recommended_agents(user_id: str) -> list[StoreAgentDetails]:
take=100,
)
# If not enough agents found, relax the useForOnboarding filter
agentListings = await prisma.models.StoreListingVersion.prisma().find_many(
where={
"id": {"in": [agent.storeListingVersionId for agent in storeAgents]},
},
include={"AgentGraph": True},
)
for listing in agentListings:
agent = listing.AgentGraph
if agent is None:
continue
graph = GraphModel.from_db(agent)
# Remove agents with empty input schema
if not graph.input_schema:
storeAgents = [
a for a in storeAgents if a.storeListingVersionId != listing.id
]
continue
# Remove agents with empty credentials
# Get nodes from this agent that have credentials
nodes = await prisma.models.AgentNode.prisma().find_many(
where={
"agentGraphId": agent.id,
"agentBlockId": {"in": list(CREDENTIALS_FIELDS.keys())},
},
)
for node in nodes:
block_id = node.agentBlockId
field_name = CREDENTIALS_FIELDS[block_id]
# If there are no credentials or they are empty, remove the agent
# FIXME ignores default values
if (
field_name not in node.constantInput
or node.constantInput[field_name] is None
):
storeAgents = [
a for a in storeAgents if a.storeListingVersionId != listing.id
]
break
# If there are less than 2 agents, add more agents to the list
if len(storeAgents) < 2:
storeAgents = await prisma.models.StoreAgent.prisma().find_many(
where=prisma.types.StoreAgentWhereInput(**where_clause),
storeAgents += await prisma.models.StoreAgent.prisma().find_many(
where={
"listing_id": {"not_in": [agent.listing_id for agent in storeAgents]},
},
order=[
{"featured": "desc"},
{"runs": "desc"},
{"rating": "desc"},
],
take=100,
take=2 - len(storeAgents),
)
# Calculate points for the first X agents and choose the top 2
@@ -337,13 +333,8 @@ async def get_recommended_agents(user_id: str) -> list[StoreAgentDetails]:
]
@cached(maxsize=1, ttl_seconds=300) # Cache for 5 minutes since this rarely changes
async def onboarding_enabled() -> bool:
"""
Check if onboarding should be enabled based on store agent count.
Cached to prevent repeated slow database queries.
"""
# Use a more efficient query that stops counting after finding enough agents
count = await prisma.models.StoreAgent.prisma().count(take=MIN_AGENT_COUNT + 1)
# Onboarding is enabled if there are at least 2 agents in the store
# Onboading is enabled if there are at least 2 agents in the store
return count >= MIN_AGENT_COUNT

View File

@@ -1,7 +1,8 @@
import logging
import os
from functools import cache
from autogpt_libs.utils.cache import cached, thread_cached
from autogpt_libs.utils.cache import thread_cached
from dotenv import load_dotenv
from redis import Redis
from redis.asyncio import Redis as AsyncRedis
@@ -12,7 +13,7 @@ load_dotenv()
HOST = os.getenv("REDIS_HOST", "localhost")
PORT = int(os.getenv("REDIS_PORT", "6379"))
PASSWORD = os.getenv("REDIS_PASSWORD", None)
PASSWORD = os.getenv("REDIS_PASSWORD", "password")
logger = logging.getLogger(__name__)
@@ -34,7 +35,7 @@ def disconnect():
get_redis().close()
@cached()
@cache
def get_redis() -> Redis:
return connect()

View File

@@ -7,7 +7,6 @@ from typing import Optional, cast
from urllib.parse import quote_plus
from autogpt_libs.auth.models import DEFAULT_USER_ID
from autogpt_libs.utils.cache import cached
from fastapi import HTTPException
from prisma.enums import NotificationType
from prisma.models import User as PrismaUser
@@ -16,19 +15,15 @@ from prisma.types import JsonFilter, UserCreateInput, UserUpdateInput
from backend.data.db import prisma
from backend.data.model import User, UserIntegrations, UserMetadata
from backend.data.notifications import NotificationPreference, NotificationPreferenceDTO
from backend.server.v2.store.exceptions import DatabaseError
from backend.util.encryption import JSONCryptor
from backend.util.exceptions import DatabaseError
from backend.util.json import SafeJson
from backend.util.settings import Settings
logger = logging.getLogger(__name__)
settings = Settings()
# Cache decorator alias for consistent user lookup caching
cache_user_lookup = cached(maxsize=1000, ttl_seconds=300)
@cache_user_lookup
async def get_or_create_user(user_data: dict) -> User:
try:
user_id = user_data.get("sub")
@@ -54,7 +49,6 @@ async def get_or_create_user(user_data: dict) -> User:
raise DatabaseError(f"Failed to get or create user {user_data}: {e}") from e
@cache_user_lookup
async def get_user_by_id(user_id: str) -> User:
user = await prisma.user.find_unique(where={"id": user_id})
if not user:
@@ -70,7 +64,6 @@ async def get_user_email_by_id(user_id: str) -> Optional[str]:
raise DatabaseError(f"Failed to get user email for user {user_id}: {e}") from e
@cache_user_lookup
async def get_user_by_email(email: str) -> Optional[User]:
try:
user = await prisma.user.find_unique(where={"email": email})
@@ -81,17 +74,7 @@ async def get_user_by_email(email: str) -> Optional[User]:
async def update_user_email(user_id: str, email: str):
try:
# Get old email first for cache invalidation
old_user = await prisma.user.find_unique(where={"id": user_id})
old_email = old_user.email if old_user else None
await prisma.user.update(where={"id": user_id}, data={"email": email})
# Selectively invalidate only the specific user entries
get_user_by_id.cache_delete(user_id)
if old_email:
get_user_by_email.cache_delete(old_email)
get_user_by_email.cache_delete(email)
except Exception as e:
raise DatabaseError(
f"Failed to update user email for user {user_id}: {e}"
@@ -131,8 +114,6 @@ async def update_user_integrations(user_id: str, data: UserIntegrations):
where={"id": user_id},
data={"integrations": encrypted_data},
)
# Invalidate cache for this user
get_user_by_id.cache_delete(user_id)
async def migrate_and_encrypt_user_integrations():
@@ -304,10 +285,6 @@ async def update_user_notification_preference(
)
if not user:
raise ValueError(f"User not found with ID: {user_id}")
# Invalidate cache for this user since notification preferences are part of user data
get_user_by_id.cache_delete(user_id)
preferences: dict[NotificationType, bool] = {
NotificationType.AGENT_RUN: user.notifyOnAgentRun or True,
NotificationType.ZERO_BALANCE: user.notifyOnZeroBalance or True,
@@ -346,44 +323,12 @@ async def set_user_email_verification(user_id: str, verified: bool) -> None:
where={"id": user_id},
data={"emailVerified": verified},
)
# Invalidate cache for this user
get_user_by_id.cache_delete(user_id)
except Exception as e:
raise DatabaseError(
f"Failed to set email verification status for user {user_id}: {e}"
) from e
async def disable_all_user_notifications(user_id: str) -> None:
"""Disable all notification preferences for a user.
Used when user's email bounces/is inactive to prevent any future notifications.
"""
try:
await PrismaUser.prisma().update(
where={"id": user_id},
data={
"notifyOnAgentRun": False,
"notifyOnZeroBalance": False,
"notifyOnLowBalance": False,
"notifyOnBlockExecutionFailed": False,
"notifyOnContinuousAgentError": False,
"notifyOnDailySummary": False,
"notifyOnWeeklySummary": False,
"notifyOnMonthlySummary": False,
"notifyOnAgentApproved": False,
"notifyOnAgentRejected": False,
},
)
# Invalidate cache for this user
get_user_by_id.cache_delete(user_id)
logger.info(f"Disabled all notification preferences for user {user_id}")
except Exception as e:
raise DatabaseError(
f"Failed to disable notifications for user {user_id}: {e}"
) from e
async def get_user_email_verification(user_id: str) -> bool:
"""Get the email verification status for a user."""
try:
@@ -462,10 +407,6 @@ async def update_user_timezone(user_id: str, timezone: str) -> User:
)
if not user:
raise ValueError(f"User not found with ID: {user_id}")
# Invalidate cache for this user
get_user_by_id.cache_delete(user_id)
return User.from_db(user)
except Exception as e:
raise DatabaseError(f"Failed to update timezone for user {user_id}: {e}") from e

View File

@@ -4,12 +4,7 @@ Module for generating AI-based activity status for graph executions.
import json
import logging
from typing import TYPE_CHECKING, Any, TypedDict
try:
from typing import NotRequired
except ImportError:
from typing_extensions import NotRequired
from typing import TYPE_CHECKING, Any, NotRequired, TypedDict
from pydantic import SecretStr
@@ -112,7 +107,7 @@ async def generate_activity_status_for_execution(
# Check if we have OpenAI API key
try:
settings = Settings()
if not settings.secrets.openai_internal_api_key:
if not settings.secrets.openai_api_key:
logger.debug(
"OpenAI API key not configured, skipping activity status generation"
)
@@ -151,35 +146,17 @@ async def generate_activity_status_for_execution(
"Focus on the ACTUAL TASK the user wanted done, not the internal workflow steps. "
"Avoid technical terms like 'workflow', 'execution', 'components', 'nodes', 'processing', etc. "
"Keep it to 3 sentences maximum. Be conversational and human-friendly.\n\n"
"UNDERSTAND THE INTENDED PURPOSE:\n"
"- FIRST: Read the graph description carefully to understand what the user wanted to accomplish\n"
"- The graph name and description tell you the main goal/intention of this automation\n"
"- Use this intended purpose as your PRIMARY criteria for success/failure evaluation\n"
"- Ask yourself: 'Did this execution actually accomplish what the graph was designed to do?'\n\n"
"CRITICAL OUTPUT ANALYSIS:\n"
"- Check if blocks that should produce user-facing results actually produced outputs\n"
"- Blocks with names containing 'Output', 'Post', 'Create', 'Send', 'Publish', 'Generate' are usually meant to produce final results\n"
"- If these critical blocks have NO outputs (empty recent_outputs), the task likely FAILED even if status shows 'completed'\n"
"- Sub-agents (AgentExecutorBlock) that produce no outputs usually indicate failed sub-tasks\n"
"- Most importantly: Does the execution result match what the graph description promised to deliver?\n\n"
"SUCCESS EVALUATION BASED ON INTENTION:\n"
"- If the graph is meant to 'create blog posts' → check if blog content was actually created\n"
"- If the graph is meant to 'send emails' → check if emails were actually sent\n"
"- If the graph is meant to 'analyze data' → check if analysis results were produced\n"
"- If the graph is meant to 'generate reports' → check if reports were generated\n"
"- Technical completion ≠ goal achievement. Focus on whether the USER'S INTENDED OUTCOME was delivered\n\n"
"IMPORTANT: Be HONEST about what actually happened:\n"
"- If the input was invalid/nonsensical, say so directly\n"
"- If the task failed, explain what went wrong in simple terms\n"
"- If errors occurred, focus on what the user needs to know\n"
"- Only claim success if the INTENDED PURPOSE was genuinely accomplished AND produced expected outputs\n"
"- Don't sugar-coat failures or present them as helpful feedback\n"
"- ESPECIALLY: If the graph's main purpose wasn't achieved, this is a failure regardless of 'completed' status\n\n"
"- Only claim success if the task was genuinely completed\n"
"- Don't sugar-coat failures or present them as helpful feedback\n\n"
"Understanding Errors:\n"
"- Node errors: Individual steps may fail but the overall task might still complete (e.g., one data source fails but others work)\n"
"- Graph error (in overall_status.graph_error): This means the entire execution failed and nothing was accomplished\n"
"- Missing outputs from critical blocks: Even if no errors, this means the task failed to produce expected results\n"
"- Focus on whether the graph's intended purpose was fulfilled, not whether technical steps completed"
"- Even if execution shows 'completed', check if critical nodes failed that would prevent the desired outcome\n"
"- Focus on the end result the user wanted, not whether technical steps completed"
),
},
{
@@ -188,28 +165,15 @@ async def generate_activity_status_for_execution(
f"A user ran '{graph_name}' to accomplish something. Based on this execution data, "
f"write what they achieved in simple, user-friendly terms:\n\n"
f"{json.dumps(execution_data, indent=2)}\n\n"
"ANALYSIS CHECKLIST:\n"
"1. READ graph_info.description FIRST - this tells you what the user intended to accomplish\n"
"2. Check overall_status.graph_error - if present, the entire execution failed\n"
"3. Look for nodes with 'Output', 'Post', 'Create', 'Send', 'Publish', 'Generate' in their block_name\n"
"4. Check if these critical blocks have empty recent_outputs arrays - this indicates failure\n"
"5. Look for AgentExecutorBlock (sub-agents) with no outputs - this suggests sub-task failures\n"
"6. Count how many nodes produced outputs vs total nodes - low ratio suggests problems\n"
"7. MOST IMPORTANT: Does the execution outcome match what graph_info.description promised?\n\n"
"INTENTION-BASED EVALUATION:\n"
"- If description mentions 'blog writing' → did it create blog content?\n"
"- If description mentions 'email automation' → were emails actually sent?\n"
"- If description mentions 'data analysis' → were analysis results produced?\n"
"- If description mentions 'content generation' → was content actually generated?\n"
"- If description mentions 'social media posting' → were posts actually made?\n"
"- Match the outputs to the stated intention, not just technical completion\n\n"
"CRITICAL: Check overall_status.graph_error FIRST - if present, the entire execution failed.\n"
"Then check individual node errors to understand partial failures.\n\n"
"Write 1-3 sentences about what the user accomplished, such as:\n"
"- 'I analyzed your resume and provided detailed feedback for the IT industry.'\n"
"- 'I couldn't complete the task because critical steps failed to produce any results.'\n"
"- 'I failed to generate the content you requested due to missing API access.'\n"
"- 'I couldn't analyze your resume because the input was just nonsensical text.'\n"
"- 'I failed to complete the task due to missing API access.'\n"
"- 'I extracted key information from your documents and organized it into a summary.'\n"
"- 'The task failed because the blog post creation step didn't produce any output.'\n\n"
"BE CRITICAL: If the graph's intended purpose (from description) wasn't achieved, report this as a failure even if status is 'completed'."
"- 'The task failed to run due to system configuration issues.'\n\n"
"Focus on what ACTUALLY happened, not what was attempted."
),
},
]
@@ -223,7 +187,7 @@ async def generate_activity_status_for_execution(
credentials = APIKeyCredentials(
id="openai",
provider="openai",
api_key=SecretStr(settings.secrets.openai_internal_api_key),
api_key=SecretStr(settings.secrets.openai_api_key),
title="System OpenAI",
)
@@ -233,7 +197,6 @@ async def generate_activity_status_for_execution(
logger.debug(
f"Generated activity status for {graph_exec_id}: {activity_status}"
)
return activity_status
except Exception as e:
@@ -460,6 +423,7 @@ async def _call_llm_direct(
credentials=credentials,
llm_model=LlmModel.GPT4O_MINI,
prompt=prompt,
json_format=False,
max_tokens=150,
compress_prompt_to_fit=True,
)

View File

@@ -468,7 +468,7 @@ class TestGenerateActivityStatusForExecution:
):
mock_get_block.side_effect = lambda block_id: mock_blocks.get(block_id)
mock_settings.return_value.secrets.openai_internal_api_key = "test_key"
mock_settings.return_value.secrets.openai_api_key = "test_key"
mock_llm.return_value = (
"I analyzed your data and provided the requested insights."
)
@@ -520,7 +520,7 @@ class TestGenerateActivityStatusForExecution:
"backend.executor.activity_status_generator.is_feature_enabled",
return_value=True,
):
mock_settings.return_value.secrets.openai_internal_api_key = ""
mock_settings.return_value.secrets.openai_api_key = ""
result = await generate_activity_status_for_execution(
graph_exec_id="test_exec",
@@ -546,7 +546,7 @@ class TestGenerateActivityStatusForExecution:
"backend.executor.activity_status_generator.is_feature_enabled",
return_value=True,
):
mock_settings.return_value.secrets.openai_internal_api_key = "test_key"
mock_settings.return_value.secrets.openai_api_key = "test_key"
result = await generate_activity_status_for_execution(
graph_exec_id="test_exec",
@@ -581,7 +581,7 @@ class TestGenerateActivityStatusForExecution:
):
mock_get_block.side_effect = lambda block_id: mock_blocks.get(block_id)
mock_settings.return_value.secrets.openai_internal_api_key = "test_key"
mock_settings.return_value.secrets.openai_api_key = "test_key"
mock_llm.return_value = "Agent completed execution."
result = await generate_activity_status_for_execution(
@@ -633,7 +633,7 @@ class TestIntegration:
):
mock_get_block.side_effect = lambda block_id: mock_blocks.get(block_id)
mock_settings.return_value.secrets.openai_internal_api_key = "test_key"
mock_settings.return_value.secrets.openai_api_key = "test_key"
mock_response = LLMResponse(
raw_response={},

View File

@@ -1,115 +0,0 @@
"""Redis-based distributed locking for cluster coordination."""
import logging
import time
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from redis import Redis
logger = logging.getLogger(__name__)
class ClusterLock:
"""Simple Redis-based distributed lock for preventing duplicate execution."""
def __init__(self, redis: "Redis", key: str, owner_id: str, timeout: int = 300):
self.redis = redis
self.key = key
self.owner_id = owner_id
self.timeout = timeout
self._last_refresh = 0.0
def try_acquire(self) -> str | None:
"""Try to acquire the lock.
Returns:
- owner_id (self.owner_id) if successfully acquired
- different owner_id if someone else holds the lock
- None if Redis is unavailable or other error
"""
try:
success = self.redis.set(self.key, self.owner_id, nx=True, ex=self.timeout)
if success:
self._last_refresh = time.time()
return self.owner_id # Successfully acquired
# Failed to acquire, get current owner
current_value = self.redis.get(self.key)
if current_value:
current_owner = (
current_value.decode("utf-8")
if isinstance(current_value, bytes)
else str(current_value)
)
return current_owner
# Key doesn't exist but we failed to set it - race condition or Redis issue
return None
except Exception as e:
logger.error(f"ClusterLock.try_acquire failed for key {self.key}: {e}")
return None
def refresh(self) -> bool:
"""Refresh lock TTL if we still own it.
Rate limited to at most once every timeout/10 seconds (minimum 1 second).
During rate limiting, still verifies lock existence but skips TTL extension.
Setting _last_refresh to 0 bypasses rate limiting for testing.
"""
# Calculate refresh interval: max(timeout // 10, 1)
refresh_interval = max(self.timeout // 10, 1)
current_time = time.time()
# Check if we're within the rate limit period
# _last_refresh == 0 forces a refresh (bypasses rate limiting for testing)
is_rate_limited = (
self._last_refresh > 0
and (current_time - self._last_refresh) < refresh_interval
)
try:
# Always verify lock existence, even during rate limiting
current_value = self.redis.get(self.key)
if not current_value:
self._last_refresh = 0
return False
stored_owner = (
current_value.decode("utf-8")
if isinstance(current_value, bytes)
else str(current_value)
)
if stored_owner != self.owner_id:
self._last_refresh = 0
return False
# If rate limited, return True but don't update TTL or timestamp
if is_rate_limited:
return True
# Perform actual refresh
if self.redis.expire(self.key, self.timeout):
self._last_refresh = current_time
return True
self._last_refresh = 0
return False
except Exception as e:
logger.error(f"ClusterLock.refresh failed for key {self.key}: {e}")
self._last_refresh = 0
return False
def release(self):
"""Release the lock."""
if self._last_refresh == 0:
return
try:
self.redis.delete(self.key)
except Exception:
pass
self._last_refresh = 0.0

View File

@@ -1,507 +0,0 @@
"""
Integration tests for ClusterLock - Redis-based distributed locking.
Tests the complete lock lifecycle without mocking Redis to ensure
real-world behavior is correct. Covers acquisition, refresh, expiry,
contention, and error scenarios.
"""
import logging
import time
import uuid
from threading import Thread
import pytest
import redis
from .cluster_lock import ClusterLock
logger = logging.getLogger(__name__)
@pytest.fixture
def redis_client():
"""Get Redis client for testing using same config as backend."""
from backend.data.redis_client import HOST, PASSWORD, PORT
# Use same config as backend but without decode_responses since ClusterLock needs raw bytes
client = redis.Redis(
host=HOST,
port=PORT,
password=PASSWORD,
decode_responses=False, # ClusterLock needs raw bytes for ownership verification
)
# Clean up any existing test keys
try:
for key in client.scan_iter(match="test_lock:*"):
client.delete(key)
except Exception:
pass # Ignore cleanup errors
return client
@pytest.fixture
def lock_key():
"""Generate unique lock key for each test."""
return f"test_lock:{uuid.uuid4()}"
@pytest.fixture
def owner_id():
"""Generate unique owner ID for each test."""
return str(uuid.uuid4())
class TestClusterLockBasic:
"""Basic lock acquisition and release functionality."""
def test_lock_acquisition_success(self, redis_client, lock_key, owner_id):
"""Test basic lock acquisition succeeds."""
lock = ClusterLock(redis_client, lock_key, owner_id, timeout=60)
# Lock should be acquired successfully
result = lock.try_acquire()
assert result == owner_id # Returns our owner_id when successfully acquired
assert lock._last_refresh > 0
# Lock key should exist in Redis
assert redis_client.exists(lock_key) == 1
assert redis_client.get(lock_key).decode("utf-8") == owner_id
def test_lock_acquisition_contention(self, redis_client, lock_key):
"""Test second acquisition fails when lock is held."""
owner1 = str(uuid.uuid4())
owner2 = str(uuid.uuid4())
lock1 = ClusterLock(redis_client, lock_key, owner1, timeout=60)
lock2 = ClusterLock(redis_client, lock_key, owner2, timeout=60)
# First lock should succeed
result1 = lock1.try_acquire()
assert result1 == owner1 # Successfully acquired, returns our owner_id
# Second lock should fail and return the first owner
result2 = lock2.try_acquire()
assert result2 == owner1 # Returns the current owner (first owner)
assert lock2._last_refresh == 0
def test_lock_release_deletes_redis_key(self, redis_client, lock_key, owner_id):
"""Test lock release deletes Redis key and marks locally as released."""
lock = ClusterLock(redis_client, lock_key, owner_id, timeout=60)
lock.try_acquire()
assert lock._last_refresh > 0
assert redis_client.exists(lock_key) == 1
# Release should delete Redis key and mark locally as released
lock.release()
assert lock._last_refresh == 0
assert lock._last_refresh == 0.0
# Redis key should be deleted for immediate release
assert redis_client.exists(lock_key) == 0
# Another lock should be able to acquire immediately
new_owner_id = str(uuid.uuid4())
new_lock = ClusterLock(redis_client, lock_key, new_owner_id, timeout=60)
assert new_lock.try_acquire() == new_owner_id
class TestClusterLockRefresh:
"""Lock refresh and TTL management."""
def test_lock_refresh_success(self, redis_client, lock_key, owner_id):
"""Test lock refresh extends TTL."""
lock = ClusterLock(redis_client, lock_key, owner_id, timeout=60)
lock.try_acquire()
original_ttl = redis_client.ttl(lock_key)
# Wait a bit then refresh
time.sleep(1)
lock._last_refresh = 0 # Force refresh past rate limit
assert lock.refresh() is True
# TTL should be reset to full timeout (allow for small timing differences)
new_ttl = redis_client.ttl(lock_key)
assert new_ttl >= original_ttl or new_ttl >= 58 # Allow for timing variance
def test_lock_refresh_rate_limiting(self, redis_client, lock_key, owner_id):
"""Test refresh is rate-limited to timeout/10."""
lock = ClusterLock(
redis_client, lock_key, owner_id, timeout=100
) # 100s timeout
lock.try_acquire()
# First refresh should work
assert lock.refresh() is True
first_refresh_time = lock._last_refresh
# Immediate second refresh should be skipped (rate limited) but verify key exists
assert lock.refresh() is True # Returns True but skips actual refresh
assert lock._last_refresh == first_refresh_time # Time unchanged
def test_lock_refresh_verifies_existence_during_rate_limit(
self, redis_client, lock_key, owner_id
):
"""Test refresh verifies lock existence even during rate limiting."""
lock = ClusterLock(redis_client, lock_key, owner_id, timeout=100)
lock.try_acquire()
# Manually delete the key (simulates expiry or external deletion)
redis_client.delete(lock_key)
# Refresh should detect missing key even during rate limit period
assert lock.refresh() is False
assert lock._last_refresh == 0
def test_lock_refresh_ownership_lost(self, redis_client, lock_key, owner_id):
"""Test refresh fails when ownership is lost."""
lock = ClusterLock(redis_client, lock_key, owner_id, timeout=60)
lock.try_acquire()
# Simulate another process taking the lock
different_owner = str(uuid.uuid4())
redis_client.set(lock_key, different_owner, ex=60)
# Force refresh past rate limit and verify it fails
lock._last_refresh = 0 # Force refresh past rate limit
assert lock.refresh() is False
assert lock._last_refresh == 0
def test_lock_refresh_when_not_acquired(self, redis_client, lock_key, owner_id):
"""Test refresh fails when lock was never acquired."""
lock = ClusterLock(redis_client, lock_key, owner_id, timeout=60)
# Refresh without acquiring should fail
assert lock.refresh() is False
class TestClusterLockExpiry:
"""Lock expiry and timeout behavior."""
def test_lock_natural_expiry(self, redis_client, lock_key, owner_id):
"""Test lock expires naturally via Redis TTL."""
lock = ClusterLock(
redis_client, lock_key, owner_id, timeout=2
) # 2 second timeout
lock.try_acquire()
assert redis_client.exists(lock_key) == 1
# Wait for expiry
time.sleep(3)
assert redis_client.exists(lock_key) == 0
# New lock with same key should succeed
new_lock = ClusterLock(redis_client, lock_key, owner_id, timeout=60)
assert new_lock.try_acquire() == owner_id
def test_lock_refresh_prevents_expiry(self, redis_client, lock_key, owner_id):
"""Test refreshing prevents lock from expiring."""
lock = ClusterLock(
redis_client, lock_key, owner_id, timeout=3
) # 3 second timeout
lock.try_acquire()
# Wait and refresh before expiry
time.sleep(1)
lock._last_refresh = 0 # Force refresh past rate limit
assert lock.refresh() is True
# Wait beyond original timeout
time.sleep(2.5)
assert redis_client.exists(lock_key) == 1 # Should still exist
class TestClusterLockConcurrency:
"""Concurrent access patterns."""
def test_multiple_threads_contention(self, redis_client, lock_key):
"""Test multiple threads competing for same lock."""
num_threads = 5
successful_acquisitions = []
def try_acquire_lock(thread_id):
owner_id = f"thread_{thread_id}"
lock = ClusterLock(redis_client, lock_key, owner_id, timeout=60)
if lock.try_acquire() == owner_id:
successful_acquisitions.append(thread_id)
time.sleep(0.1) # Hold lock briefly
lock.release()
threads = []
for i in range(num_threads):
thread = Thread(target=try_acquire_lock, args=(i,))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
# Only one thread should have acquired the lock
assert len(successful_acquisitions) == 1
def test_sequential_lock_reuse(self, redis_client, lock_key):
"""Test lock can be reused after natural expiry."""
owners = [str(uuid.uuid4()) for _ in range(3)]
for i, owner_id in enumerate(owners):
lock = ClusterLock(redis_client, lock_key, owner_id, timeout=1) # 1 second
assert lock.try_acquire() == owner_id
time.sleep(1.5) # Wait for expiry
# Verify lock expired
assert redis_client.exists(lock_key) == 0
def test_refresh_during_concurrent_access(self, redis_client, lock_key):
"""Test lock refresh works correctly during concurrent access attempts."""
owner1 = str(uuid.uuid4())
owner2 = str(uuid.uuid4())
lock1 = ClusterLock(redis_client, lock_key, owner1, timeout=5)
lock2 = ClusterLock(redis_client, lock_key, owner2, timeout=5)
# Thread 1 holds lock and refreshes
assert lock1.try_acquire() == owner1
def refresh_continuously():
for _ in range(10):
lock1._last_refresh = 0 # Force refresh
lock1.refresh()
time.sleep(0.1)
def try_acquire_continuously():
attempts = 0
while attempts < 20:
if lock2.try_acquire() == owner2:
return True
time.sleep(0.1)
attempts += 1
return False
refresh_thread = Thread(target=refresh_continuously)
acquire_thread = Thread(target=try_acquire_continuously)
refresh_thread.start()
acquire_thread.start()
refresh_thread.join()
acquire_thread.join()
# Lock1 should still own the lock due to refreshes
assert lock1._last_refresh > 0
assert lock2._last_refresh == 0
class TestClusterLockErrorHandling:
"""Error handling and edge cases."""
def test_redis_connection_failure_on_acquire(self, lock_key, owner_id):
"""Test graceful handling when Redis is unavailable during acquisition."""
# Use invalid Redis connection
bad_redis = redis.Redis(
host="invalid_host", port=1234, socket_connect_timeout=1
)
lock = ClusterLock(bad_redis, lock_key, owner_id, timeout=60)
# Should return None for Redis connection failures
result = lock.try_acquire()
assert result is None # Returns None when Redis fails
assert lock._last_refresh == 0
def test_redis_connection_failure_on_refresh(
self, redis_client, lock_key, owner_id
):
"""Test graceful handling when Redis fails during refresh."""
lock = ClusterLock(redis_client, lock_key, owner_id, timeout=60)
# Acquire normally
assert lock.try_acquire() == owner_id
# Replace Redis client with failing one
lock.redis = redis.Redis(
host="invalid_host", port=1234, socket_connect_timeout=1
)
# Refresh should fail gracefully
lock._last_refresh = 0 # Force refresh
assert lock.refresh() is False
assert lock._last_refresh == 0
def test_invalid_lock_parameters(self, redis_client):
"""Test validation of lock parameters."""
owner_id = str(uuid.uuid4())
# All parameters are now simple - no validation needed
# Just test basic construction works
lock = ClusterLock(redis_client, "test_key", owner_id, timeout=60)
assert lock.key == "test_key"
assert lock.owner_id == owner_id
assert lock.timeout == 60
def test_refresh_after_redis_key_deleted(self, redis_client, lock_key, owner_id):
"""Test refresh behavior when Redis key is manually deleted."""
lock = ClusterLock(redis_client, lock_key, owner_id, timeout=60)
lock.try_acquire()
# Manually delete the key (simulates external deletion)
redis_client.delete(lock_key)
# Refresh should fail and mark as not acquired
lock._last_refresh = 0 # Force refresh
assert lock.refresh() is False
assert lock._last_refresh == 0
class TestClusterLockDynamicRefreshInterval:
"""Dynamic refresh interval based on timeout."""
def test_refresh_interval_calculation(self, redis_client, lock_key, owner_id):
"""Test refresh interval is calculated as max(timeout/10, 1)."""
test_cases = [
(5, 1), # 5/10 = 0, but minimum is 1
(10, 1), # 10/10 = 1
(30, 3), # 30/10 = 3
(100, 10), # 100/10 = 10
(200, 20), # 200/10 = 20
(1000, 100), # 1000/10 = 100
]
for timeout, expected_interval in test_cases:
lock = ClusterLock(
redis_client, f"{lock_key}_{timeout}", owner_id, timeout=timeout
)
lock.try_acquire()
# Calculate expected interval using same logic as implementation
refresh_interval = max(timeout // 10, 1)
assert refresh_interval == expected_interval
# Test rate limiting works with calculated interval
assert lock.refresh() is True
first_refresh_time = lock._last_refresh
# Sleep less than interval - should be rate limited
time.sleep(0.1)
assert lock.refresh() is True
assert lock._last_refresh == first_refresh_time # No actual refresh
class TestClusterLockRealWorldScenarios:
"""Real-world usage patterns."""
def test_execution_coordination_simulation(self, redis_client):
"""Simulate graph execution coordination across multiple pods."""
graph_exec_id = str(uuid.uuid4())
lock_key = f"execution:{graph_exec_id}"
# Simulate 3 pods trying to execute same graph
pods = [f"pod_{i}" for i in range(3)]
execution_results = {}
def execute_graph(pod_id):
"""Simulate graph execution with cluster lock."""
lock = ClusterLock(redis_client, lock_key, pod_id, timeout=300)
if lock.try_acquire() == pod_id:
# Simulate execution work
execution_results[pod_id] = "executed"
time.sleep(0.1)
lock.release()
else:
execution_results[pod_id] = "rejected"
threads = []
for pod_id in pods:
thread = Thread(target=execute_graph, args=(pod_id,))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
# Only one pod should have executed
executed_count = sum(
1 for result in execution_results.values() if result == "executed"
)
rejected_count = sum(
1 for result in execution_results.values() if result == "rejected"
)
assert executed_count == 1
assert rejected_count == 2
def test_long_running_execution_with_refresh(
self, redis_client, lock_key, owner_id
):
"""Test lock maintains ownership during long execution with periodic refresh."""
lock = ClusterLock(
redis_client, lock_key, owner_id, timeout=30
) # 30 second timeout, refresh interval = max(30//10, 1) = 3 seconds
def long_execution_with_refresh():
"""Simulate long-running execution with periodic refresh."""
assert lock.try_acquire() == owner_id
# Simulate 10 seconds of work with refreshes every 2 seconds
# This respects rate limiting - actual refreshes will happen at 0s, 3s, 6s, 9s
try:
for i in range(5): # 5 iterations * 2 seconds = 10 seconds total
time.sleep(2)
refresh_success = lock.refresh()
assert refresh_success is True, f"Refresh failed at iteration {i}"
return "completed"
finally:
lock.release()
# Should complete successfully without losing lock
result = long_execution_with_refresh()
assert result == "completed"
def test_graceful_degradation_pattern(self, redis_client, lock_key):
"""Test graceful degradation when Redis becomes unavailable."""
owner_id = str(uuid.uuid4())
lock = ClusterLock(
redis_client, lock_key, owner_id, timeout=3
) # Use shorter timeout
# Normal operation
assert lock.try_acquire() == owner_id
lock._last_refresh = 0 # Force refresh past rate limit
assert lock.refresh() is True
# Simulate Redis becoming unavailable
original_redis = lock.redis
lock.redis = redis.Redis(
host="invalid_host",
port=1234,
socket_connect_timeout=1,
decode_responses=False,
)
# Should degrade gracefully
lock._last_refresh = 0 # Force refresh past rate limit
assert lock.refresh() is False
assert lock._last_refresh == 0
# Restore Redis and verify can acquire again
lock.redis = original_redis
# Wait for original lock to expire (use longer wait for 3s timeout)
time.sleep(4)
new_lock = ClusterLock(redis_client, lock_key, owner_id, timeout=60)
assert new_lock.try_acquire() == owner_id
if __name__ == "__main__":
# Run specific test for quick validation
pytest.main([__file__, "-v"])

View File

@@ -9,7 +9,6 @@ from backend.data.execution import (
get_execution_kv_data,
get_graph_execution_meta,
get_graph_executions,
get_graph_executions_count,
get_latest_node_execution,
get_node_execution,
get_node_executions,
@@ -29,13 +28,11 @@ from backend.data.graph import (
get_node,
)
from backend.data.notifications import (
clear_all_user_notification_batches,
create_or_add_to_user_notification_batch,
empty_user_notification_batch,
get_all_batches_by_type,
get_user_notification_batch,
get_user_notification_oldest_message_in_batch,
remove_notifications_from_batch,
)
from backend.data.user import (
get_active_user_ids_in_timerange,
@@ -74,6 +71,7 @@ async def _get_credits(user_id: str) -> int:
class DatabaseManager(AppService):
def run_service(self) -> None:
logger.info(f"[{self.service_name}] ⏳ Connecting to Database...")
self.run_and_wait(db.connect())
@@ -87,16 +85,6 @@ class DatabaseManager(AppService):
async def health_check(self) -> str:
if not db.is_connected():
raise UnhealthyServiceError("Database is not connected")
try:
# Test actual database connectivity by executing a simple query
# This will fail if Prisma query engine is not responding
result = await db.query_raw_with_schema("SELECT 1 as health_check")
if not result or result[0].get("health_check") != 1:
raise UnhealthyServiceError("Database query test failed")
except Exception as e:
raise UnhealthyServiceError(f"Database health check failed: {e}")
return await super().health_check()
@classmethod
@@ -113,7 +101,6 @@ class DatabaseManager(AppService):
# Executions
get_graph_executions = _(get_graph_executions)
get_graph_executions_count = _(get_graph_executions_count)
get_graph_execution_meta = _(get_graph_execution_meta)
create_graph_execution = _(create_graph_execution)
get_node_execution = _(get_node_execution)
@@ -150,12 +137,10 @@ class DatabaseManager(AppService):
get_user_notification_preference = _(get_user_notification_preference)
# Notifications - async
clear_all_user_notification_batches = _(clear_all_user_notification_batches)
create_or_add_to_user_notification_batch = _(
create_or_add_to_user_notification_batch
)
empty_user_notification_batch = _(empty_user_notification_batch)
remove_notifications_from_batch = _(remove_notifications_from_batch)
get_all_batches_by_type = _(get_all_batches_by_type)
get_user_notification_batch = _(get_user_notification_batch)
get_user_notification_oldest_message_in_batch = _(
@@ -184,7 +169,6 @@ class DatabaseManagerClient(AppServiceClient):
# Executions
get_graph_executions = _(d.get_graph_executions)
get_graph_executions_count = _(d.get_graph_executions_count)
get_graph_execution_meta = _(d.get_graph_execution_meta)
get_node_executions = _(d.get_node_executions)
update_node_execution_status = _(d.update_node_execution_status)
@@ -247,12 +231,10 @@ class DatabaseManagerAsyncClient(AppServiceClient):
get_user_notification_preference = d.get_user_notification_preference
# Notifications
clear_all_user_notification_batches = d.clear_all_user_notification_batches
create_or_add_to_user_notification_batch = (
d.create_or_add_to_user_notification_batch
)
empty_user_notification_batch = d.empty_user_notification_batch
remove_notifications_from_batch = d.remove_notifications_from_batch
get_all_batches_by_type = d.get_all_batches_by_type
get_user_notification_batch = d.get_user_notification_batch
get_user_notification_oldest_message_in_batch = (

View File

@@ -3,42 +3,16 @@ import logging
import os
import threading
import time
import uuid
from collections import defaultdict
from concurrent.futures import Future, ThreadPoolExecutor
from contextlib import asynccontextmanager
from datetime import datetime, timedelta, timezone
from typing import TYPE_CHECKING, Any, Optional, TypeVar, cast
import sentry_sdk
from pika.adapters.blocking_connection import BlockingChannel
from pika.spec import Basic, BasicProperties
from prometheus_client import Gauge, start_http_server
from redis.asyncio.lock import Lock as AsyncRedisLock
from redis.asyncio.lock import Lock as RedisLock
from backend.blocks.agent import AgentExecutorBlock
from backend.blocks.io import AgentOutputBlock
from backend.data import redis_client as redis
from backend.data.block import (
BlockInput,
BlockOutput,
BlockOutputEntry,
BlockSchema,
get_block,
)
from backend.data.credit import UsageTransactionMetadata
from backend.data.dynamic_fields import parse_execution_output
from backend.data.execution import (
ExecutionQueue,
ExecutionStatus,
GraphExecution,
GraphExecutionEntry,
NodeExecutionEntry,
NodeExecutionResult,
NodesInputMasks,
UserContext,
)
from backend.data.graph import Link, Node
from backend.data.model import GraphExecutionStats, NodeExecutionStats
from backend.data.notifications import (
AgentRunData,
@@ -51,21 +25,50 @@ from backend.data.rabbitmq import SyncRabbitMQ
from backend.executor.activity_status_generator import (
generate_activity_status_for_execution,
)
from backend.executor.utils import LogMetadata
from backend.notifications.notifications import queue_notification
from backend.util.exceptions import InsufficientBalanceError, ModerationError
if TYPE_CHECKING:
from backend.executor import DatabaseManagerClient, DatabaseManagerAsyncClient
from prometheus_client import Gauge, start_http_server
from backend.blocks.agent import AgentExecutorBlock
from backend.data import redis_client as redis
from backend.data.block import (
BlockInput,
BlockOutput,
BlockOutputEntry,
BlockSchema,
get_block,
)
from backend.data.credit import UsageTransactionMetadata
from backend.data.execution import (
ExecutionQueue,
ExecutionStatus,
GraphExecution,
GraphExecutionEntry,
NodeExecutionEntry,
NodeExecutionResult,
NodesInputMasks,
UserContext,
)
from backend.data.graph import Link, Node
from backend.executor.utils import (
GRACEFUL_SHUTDOWN_TIMEOUT_SECONDS,
GRAPH_EXECUTION_CANCEL_QUEUE_NAME,
GRAPH_EXECUTION_QUEUE_NAME,
CancelExecutionEvent,
ExecutionOutputEntry,
LogMetadata,
NodeExecutionProgress,
block_usage_cost,
create_execution_queue_config,
execution_usage_cost,
parse_execution_output,
validate_exec,
)
from backend.integrations.creds_manager import IntegrationCredentialsManager
from backend.notifications.notifications import queue_notification
from backend.server.v2.AutoMod.manager import automod_manager
from backend.util import json
from backend.util.clients import (
@@ -81,24 +84,13 @@ from backend.util.decorator import (
error_logged,
time_measured,
)
from backend.util.exceptions import InsufficientBalanceError, ModerationError
from backend.util.file import clean_exec_files
from backend.util.logging import TruncatedLogger, configure_logging
from backend.util.metrics import DiscordChannel
from backend.util.process import AppProcess, set_service_name
from backend.util.retry import (
continuous_retry,
func_retry,
send_rate_limited_discord_alert,
)
from backend.util.retry import continuous_retry, func_retry
from backend.util.settings import Settings
from .cluster_lock import ClusterLock
if TYPE_CHECKING:
from backend.executor import DatabaseManagerAsyncClient, DatabaseManagerClient
_logger = logging.getLogger(__name__)
logger = TruncatedLogger(_logger, prefix="[GraphExecutor]")
settings = Settings()
@@ -114,7 +106,6 @@ utilization_gauge = Gauge(
"Ratio of active graph runs to max graph workers",
)
# Thread-local storage for ExecutionProcessor instances
_tls = threading.local()
@@ -126,14 +117,10 @@ def init_worker():
def execute_graph(
graph_exec_entry: "GraphExecutionEntry",
cancel_event: threading.Event,
cluster_lock: ClusterLock,
graph_exec_entry: "GraphExecutionEntry", cancel_event: threading.Event
):
"""Execute graph using thread-local ExecutionProcessor instance"""
return _tls.processor.on_graph_execution(
graph_exec_entry, cancel_event, cluster_lock
)
return _tls.processor.on_graph_execution(graph_exec_entry, cancel_event)
T = TypeVar("T")
@@ -190,7 +177,6 @@ async def execute_node(
_input_data.inputs = input_data
if nodes_input_masks:
_input_data.nodes_input_masks = nodes_input_masks
_input_data.user_id = user_id
input_data = _input_data.model_dump()
data.inputs = input_data
@@ -225,23 +211,6 @@ async def execute_node(
extra_exec_kwargs[field_name] = credentials
output_size = 0
# sentry tracking nonsense to get user counts for blocks because isolation scopes don't work :(
scope = sentry_sdk.get_current_scope()
# save the tags
original_user = scope._user
original_tags = dict(scope._tags) if scope._tags else {}
# Set user ID for error tracking
scope.set_user({"id": user_id})
scope.set_tag("graph_id", graph_id)
scope.set_tag("node_id", node_id)
scope.set_tag("block_name", node_block.name)
scope.set_tag("block_id", node_block.id)
for k, v in (data.user_context or UserContext(timezone="UTC")).model_dump().items():
scope.set_tag(f"user_context.{k}", v)
try:
async for output_name, output_data in node_block.execute(
input_data, **extra_exec_kwargs
@@ -250,12 +219,6 @@ async def execute_node(
output_size += len(json.dumps(output_data))
log_metadata.debug("Node produced output", **{output_name: output_data})
yield output_name, output_data
except Exception:
# Capture exception WITH context still set before restoring scope
sentry_sdk.capture_exception(scope=scope)
sentry_sdk.flush() # Ensure it's sent before we restore scope
# Re-raise to maintain normal error flow
raise
finally:
# Ensure credentials are released even if execution fails
if creds_lock and (await creds_lock.locked()) and (await creds_lock.owned()):
@@ -270,10 +233,6 @@ async def execute_node(
execution_stats.input_size = input_size
execution_stats.output_size = output_size
# Restore scope AFTER error has been captured
scope._user = original_user
scope._tags = original_tags
async def _enqueue_next_nodes(
db_client: "DatabaseManagerAsyncClient",
@@ -470,7 +429,7 @@ class ExecutionProcessor:
graph_id=node_exec.graph_id,
node_eid=node_exec.node_exec_id,
node_id=node_exec.node_id,
block_name=b.name if (b := get_block(node_exec.block_id)) else "-",
block_name="-",
)
db_client = get_db_async_client()
node = await db_client.get_node(node_exec.node_id)
@@ -598,6 +557,7 @@ class ExecutionProcessor:
await persist_output(
"error", str(stats.error) or type(stats.error).__name__
)
return status
@func_retry
@@ -623,7 +583,6 @@ class ExecutionProcessor:
self,
graph_exec: GraphExecutionEntry,
cancel: threading.Event,
cluster_lock: ClusterLock,
):
log_metadata = LogMetadata(
logger=_logger,
@@ -646,7 +605,7 @@ class ExecutionProcessor:
)
return
if exec_meta.status in [ExecutionStatus.QUEUED, ExecutionStatus.INCOMPLETE]:
if exec_meta.status == ExecutionStatus.QUEUED:
log_metadata.info(f"⚙️ Starting graph execution #{graph_exec.graph_exec_id}")
exec_meta.status = ExecutionStatus.RUNNING
send_execution_update(
@@ -682,7 +641,6 @@ class ExecutionProcessor:
cancel=cancel,
log_metadata=log_metadata,
execution_stats=exec_stats,
cluster_lock=cluster_lock,
)
exec_stats.walltime += timing_info.wall_time
exec_stats.cputime += timing_info.cpu_time
@@ -784,7 +742,6 @@ class ExecutionProcessor:
cancel: threading.Event,
log_metadata: LogMetadata,
execution_stats: GraphExecutionStats,
cluster_lock: ClusterLock,
) -> ExecutionStatus:
"""
Returns:
@@ -970,7 +927,7 @@ class ExecutionProcessor:
and execution_queue.empty()
and (running_node_execution or running_node_evaluation)
):
cluster_lock.refresh()
# There is nothing to execute, and no output to process, let's relax for a while.
time.sleep(0.1)
# loop done --------------------------------------------------
@@ -1012,31 +969,16 @@ class ExecutionProcessor:
if isinstance(e, Exception)
else Exception(f"{e.__class__.__name__}: {e}")
)
if not execution_stats.error:
execution_stats.error = str(error)
known_errors = (InsufficientBalanceError, ModerationError)
if isinstance(error, known_errors):
execution_stats.error = str(error)
return ExecutionStatus.FAILED
execution_status = ExecutionStatus.FAILED
log_metadata.exception(
f"Failed graph execution {graph_exec.graph_exec_id}: {error}"
)
# Send rate-limited Discord alert for unknown/unexpected errors
send_rate_limited_discord_alert(
"graph_execution",
error,
"unknown_error",
f"🚨 **Unknown Graph Execution Error**\n"
f"User: {graph_exec.user_id}\n"
f"Graph ID: {graph_exec.graph_id}\n"
f"Execution ID: {graph_exec.graph_exec_id}\n"
f"Error Type: {type(error).__name__}\n"
f"Error: {str(error)[:200]}{'...' if len(str(error)) > 200 else ''}\n",
)
raise
finally:
@@ -1211,9 +1153,9 @@ class ExecutionProcessor:
f"❌ **Insufficient Funds Alert**\n"
f"User: {user_email or user_id}\n"
f"Agent: {metadata.name if metadata else 'Unknown Agent'}\n"
f"Current balance: ${e.balance / 100:.2f}\n"
f"Attempted cost: ${abs(e.amount) / 100:.2f}\n"
f"Shortfall: ${abs(shortfall) / 100:.2f}\n"
f"Current balance: ${e.balance/100:.2f}\n"
f"Attempted cost: ${abs(e.amount)/100:.2f}\n"
f"Shortfall: ${abs(shortfall)/100:.2f}\n"
f"[View User Details]({base_url}/admin/spending?search={user_email})"
)
@@ -1260,9 +1202,9 @@ class ExecutionProcessor:
alert_message = (
f"⚠️ **Low Balance Alert**\n"
f"User: {user_email or user_id}\n"
f"Balance dropped below ${LOW_BALANCE_THRESHOLD / 100:.2f}\n"
f"Current balance: ${current_balance / 100:.2f}\n"
f"Transaction cost: ${transaction_cost / 100:.2f}\n"
f"Balance dropped below ${LOW_BALANCE_THRESHOLD/100:.2f}\n"
f"Current balance: ${current_balance/100:.2f}\n"
f"Transaction cost: ${transaction_cost/100:.2f}\n"
f"[View User Details]({base_url}/admin/spending?search={user_email})"
)
get_notification_manager_client().discord_system_alert(
@@ -1277,7 +1219,6 @@ class ExecutionManager(AppProcess):
super().__init__()
self.pool_size = settings.config.num_graph_workers
self.active_graph_runs: dict[str, tuple[Future, threading.Event]] = {}
self.executor_id = str(uuid.uuid4())
self._executor = None
self._stop_consuming = None
@@ -1287,8 +1228,6 @@ class ExecutionManager(AppProcess):
self._run_thread = None
self._run_client = None
self._execution_locks = {}
@property
def cancel_thread(self) -> threading.Thread:
if self._cancel_thread is None:
@@ -1493,78 +1432,20 @@ class ExecutionManager(AppProcess):
return
graph_exec_id = graph_exec_entry.graph_exec_id
user_id = graph_exec_entry.user_id
graph_id = graph_exec_entry.graph_id
logger.info(
f"[{self.service_name}] Received RUN for graph_exec_id={graph_exec_id}, user_id={user_id}"
f"[{self.service_name}] Received RUN for graph_exec_id={graph_exec_id}"
)
# Check user rate limit before processing
try:
# Only check executions from the last 24 hours for performance
current_running_count = get_db_client().get_graph_executions_count(
user_id=user_id,
graph_id=graph_id,
statuses=[ExecutionStatus.RUNNING],
created_time_gte=datetime.now(timezone.utc) - timedelta(hours=24),
)
if (
current_running_count
>= settings.config.max_concurrent_graph_executions_per_user
):
logger.warning(
f"[{self.service_name}] Rate limit exceeded for user {user_id} on graph {graph_id}: "
f"{current_running_count}/{settings.config.max_concurrent_graph_executions_per_user} running executions"
)
_ack_message(reject=True, requeue=True)
return
except Exception as e:
logger.error(
f"[{self.service_name}] Failed to check rate limit for user {user_id}: {e}, proceeding with execution"
)
# If rate limit check fails, proceed to avoid blocking executions
# Check for local duplicate execution first
if graph_exec_id in self.active_graph_runs:
logger.warning(
f"[{self.service_name}] Graph {graph_exec_id} already running locally; rejecting duplicate."
# TODO: Make this check cluster-wide, prevent duplicate runs across executor pods.
logger.error(
f"[{self.service_name}] Graph {graph_exec_id} already running; rejecting duplicate run."
)
_ack_message(reject=True, requeue=True)
_ack_message(reject=True, requeue=False)
return
# Try to acquire cluster-wide execution lock
cluster_lock = ClusterLock(
redis=redis.get_redis(),
key=f"exec_lock:{graph_exec_id}",
owner_id=self.executor_id,
timeout=settings.config.cluster_lock_timeout,
)
current_owner = cluster_lock.try_acquire()
if current_owner != self.executor_id:
# Either someone else has it or Redis is unavailable
if current_owner is not None:
logger.warning(
f"[{self.service_name}] Graph {graph_exec_id} already running on pod {current_owner}"
)
else:
logger.warning(
f"[{self.service_name}] Could not acquire lock for {graph_exec_id} - Redis unavailable"
)
_ack_message(reject=True, requeue=True)
return
self._execution_locks[graph_exec_id] = cluster_lock
logger.info(
f"[{self.service_name}] Acquired cluster lock for {graph_exec_id} with executor {self.executor_id}"
)
cancel_event = threading.Event()
future = self.executor.submit(
execute_graph, graph_exec_entry, cancel_event, cluster_lock
)
future = self.executor.submit(execute_graph, graph_exec_entry, cancel_event)
self.active_graph_runs[graph_exec_id] = (future, cancel_event)
self._update_prompt_metrics()
@@ -1583,10 +1464,6 @@ class ExecutionManager(AppProcess):
f"[{self.service_name}] Error in run completion callback: {e}"
)
finally:
# Release the cluster-wide execution lock
if graph_exec_id in self._execution_locks:
self._execution_locks[graph_exec_id].release()
del self._execution_locks[graph_exec_id]
self._cleanup_completed_runs()
future.add_done_callback(_on_run_done)
@@ -1669,10 +1546,6 @@ class ExecutionManager(AppProcess):
f"{prefix} ⏳ Still waiting for {len(self.active_graph_runs)} executions: {ids}"
)
for graph_exec_id in self.active_graph_runs:
if lock := self._execution_locks.get(graph_exec_id):
lock.refresh()
time.sleep(wait_interval)
waited += wait_interval
@@ -1690,15 +1563,6 @@ class ExecutionManager(AppProcess):
except Exception as e:
logger.error(f"{prefix} ⚠️ Error during executor shutdown: {type(e)} {e}")
# Release remaining execution locks
try:
for lock in self._execution_locks.values():
lock.release()
self._execution_locks.clear()
logger.info(f"{prefix} ✅ Released execution locks")
except Exception as e:
logger.warning(f"{prefix} ⚠️ Failed to release all locks: {e}")
# Disconnect the run execution consumer
self._stop_message_consumers(
self.run_thread,
@@ -1804,18 +1668,15 @@ def update_graph_execution_state(
@asynccontextmanager
async def synchronized(key: str, timeout: int = settings.config.cluster_lock_timeout):
async def synchronized(key: str, timeout: int = 60):
r = await redis.get_redis_async()
lock: AsyncRedisLock = r.lock(f"lock:{key}", timeout=timeout)
lock: RedisLock = r.lock(f"lock:{key}", timeout=timeout)
try:
await lock.acquire()
yield
finally:
if await lock.locked() and await lock.owned():
try:
await lock.release()
except Exception as e:
logger.warning(f"Failed to release lock for key {key}: {e}")
await lock.release()
def increment_execution_count(user_id: str) -> int:

View File

@@ -191,22 +191,15 @@ class GraphExecutionJobInfo(GraphExecutionJobArgs):
id: str
name: str
next_run_time: str
timezone: str = Field(default="UTC", description="Timezone used for scheduling")
@staticmethod
def from_db(
job_args: GraphExecutionJobArgs, job_obj: JobObj
) -> "GraphExecutionJobInfo":
# Extract timezone from the trigger if it's a CronTrigger
timezone_str = "UTC"
if hasattr(job_obj.trigger, "timezone"):
timezone_str = str(job_obj.trigger.timezone)
return GraphExecutionJobInfo(
id=job_obj.id,
name=job_obj.name,
next_run_time=job_obj.next_run_time.isoformat(),
timezone=timezone_str,
**job_args.model_dump(),
)
@@ -402,7 +395,6 @@ class Scheduler(AppService):
input_data: BlockInput,
input_credentials: dict[str, CredentialsMetaInput],
name: Optional[str] = None,
user_timezone: str | None = None,
) -> GraphExecutionJobInfo:
# Validate the graph before scheduling to prevent runtime failures
# We don't need the return value, just want the validation to run
@@ -416,18 +408,7 @@ class Scheduler(AppService):
)
)
# Use provided timezone or default to UTC
# Note: Timezone should be passed from the client to avoid database lookups
if not user_timezone:
user_timezone = "UTC"
logger.warning(
f"No timezone provided for user {user_id}, using UTC for scheduling. "
f"Client should pass user's timezone for correct scheduling."
)
logger.info(
f"Scheduling job for user {user_id} with timezone {user_timezone} (cron: {cron})"
)
logger.info(f"Scheduling job for user {user_id} in UTC (cron: {cron})")
job_args = GraphExecutionJobArgs(
user_id=user_id,
@@ -441,12 +422,12 @@ class Scheduler(AppService):
execute_graph,
kwargs=job_args.model_dump(),
name=name,
trigger=CronTrigger.from_crontab(cron, timezone=user_timezone),
trigger=CronTrigger.from_crontab(cron, timezone="UTC"),
jobstore=Jobstores.EXECUTION.value,
replace_existing=True,
)
logger.info(
f"Added job {job.id} with cron schedule '{cron}' in timezone {user_timezone}, input data: {input_data}"
f"Added job {job.id} with cron schedule '{cron}' in UTC, input data: {input_data}"
)
return GraphExecutionJobInfo.from_db(job_args, job)

View File

@@ -4,7 +4,7 @@ import threading
import time
from collections import defaultdict
from concurrent.futures import Future
from typing import Mapping, Optional, cast
from typing import Any, Mapping, Optional, cast
from pydantic import BaseModel, JsonValue, ValidationError
@@ -20,9 +20,6 @@ from backend.data.block import (
)
from backend.data.block_cost_config import BLOCK_COSTS
from backend.data.db import prisma
# Import dynamic field utilities from centralized location
from backend.data.dynamic_fields import merge_execution_input
from backend.data.execution import (
ExecutionStatus,
GraphExecutionStats,
@@ -42,6 +39,7 @@ from backend.util.clients import (
)
from backend.util.exceptions import GraphValidationError, NotFoundError
from backend.util.logging import TruncatedLogger
from backend.util.mock import MockObject
from backend.util.settings import Config
from backend.util.type import convert
@@ -188,7 +186,195 @@ def _is_cost_filter_match(cost_filter: BlockInput, input_data: BlockInput) -> bo
# ============ Execution Input Helpers ============ #
# Dynamic field utilities are now imported from backend.data.dynamic_fields
# --------------------------------------------------------------------------- #
# Delimiters
# --------------------------------------------------------------------------- #
LIST_SPLIT = "_$_"
DICT_SPLIT = "_#_"
OBJC_SPLIT = "_@_"
_DELIMS = (LIST_SPLIT, DICT_SPLIT, OBJC_SPLIT)
# --------------------------------------------------------------------------- #
# Tokenisation utilities
# --------------------------------------------------------------------------- #
def _next_delim(s: str) -> tuple[str | None, int]:
"""
Return the *earliest* delimiter appearing in `s` and its index.
If none present → (None, -1).
"""
first: str | None = None
pos = len(s) # sentinel: larger than any real index
for d in _DELIMS:
i = s.find(d)
if 0 <= i < pos:
first, pos = d, i
return first, (pos if first else -1)
def _tokenise(path: str) -> list[tuple[str, str]] | None:
"""
Convert the raw path string (starting with a delimiter) into
[ (delimiter, identifier), … ] or None if the syntax is malformed.
"""
tokens: list[tuple[str, str]] = []
while path:
# 1. Which delimiter starts this chunk?
delim = next((d for d in _DELIMS if path.startswith(d)), None)
if delim is None:
return None # invalid syntax
# 2. Slice off the delimiter, then up to the next delimiter (or EOS)
path = path[len(delim) :]
nxt_delim, pos = _next_delim(path)
token, path = (
path[: pos if pos != -1 else len(path)],
path[pos if pos != -1 else len(path) :],
)
if token == "":
return None # empty identifier is invalid
tokens.append((delim, token))
return tokens
# --------------------------------------------------------------------------- #
# Public API parsing (flattened ➜ concrete)
# --------------------------------------------------------------------------- #
def parse_execution_output(output: BlockOutputEntry, name: str) -> JsonValue | None:
"""
Retrieve a nested value out of `output` using the flattened *name*.
On any failure (wrong name, wrong type, out-of-range, bad path)
returns **None**.
"""
base_name, data = output
# Exact match → whole object
if name == base_name:
return data
# Must start with the expected name
if not name.startswith(base_name):
return None
path = name[len(base_name) :]
if not path:
return None # nothing left to parse
tokens = _tokenise(path)
if tokens is None:
return None
cur: JsonValue = data
for delim, ident in tokens:
if delim == LIST_SPLIT:
# list[index]
try:
idx = int(ident)
except ValueError:
return None
if not isinstance(cur, list) or idx >= len(cur):
return None
cur = cur[idx]
elif delim == DICT_SPLIT:
if not isinstance(cur, dict) or ident not in cur:
return None
cur = cur[ident]
elif delim == OBJC_SPLIT:
if not hasattr(cur, ident):
return None
cur = getattr(cur, ident)
else:
return None # unreachable
return cur
def _assign(container: Any, tokens: list[tuple[str, str]], value: Any) -> Any:
"""
Recursive helper that *returns* the (possibly new) container with
`value` assigned along the remaining `tokens` path.
"""
if not tokens:
return value # leaf reached
delim, ident = tokens[0]
rest = tokens[1:]
# ---------- list ----------
if delim == LIST_SPLIT:
try:
idx = int(ident)
except ValueError:
raise ValueError("index must be an integer")
if container is None:
container = []
elif not isinstance(container, list):
container = list(container) if hasattr(container, "__iter__") else []
while len(container) <= idx:
container.append(None)
container[idx] = _assign(container[idx], rest, value)
return container
# ---------- dict ----------
if delim == DICT_SPLIT:
if container is None:
container = {}
elif not isinstance(container, dict):
container = dict(container) if hasattr(container, "items") else {}
container[ident] = _assign(container.get(ident), rest, value)
return container
# ---------- object ----------
if delim == OBJC_SPLIT:
if container is None or not isinstance(container, MockObject):
container = MockObject()
setattr(
container,
ident,
_assign(getattr(container, ident, None), rest, value),
)
return container
return value # unreachable
def merge_execution_input(data: BlockInput) -> BlockInput:
"""
Reconstruct nested objects from a *flattened* dict of key → value.
Raises ValueError on syntactically invalid list indices.
"""
merged: BlockInput = {}
for key, value in data.items():
# Split off the base name (before the first delimiter, if any)
delim, pos = _next_delim(key)
if delim is None:
merged[key] = value
continue
base, path = key[:pos], key[pos:]
tokens = _tokenise(path)
if tokens is None:
# Invalid key; treat as scalar under the raw name
merged[key] = value
continue
merged[base] = _assign(merged.get(base), tokens, value)
data.update(merged)
return data
def validate_exec(
@@ -728,30 +914,29 @@ async def add_graph_execution(
preset_id=preset_id,
)
# Fetch user context for the graph execution
user_context = await get_user_context(user_id)
queue = await get_async_execution_queue()
graph_exec_entry = graph_exec.to_graph_execution_entry(
user_context=await get_user_context(user_id),
compiled_nodes_input_masks=compiled_nodes_input_masks,
user_context, compiled_nodes_input_masks
)
logger.info(
f"Created graph execution #{graph_exec.id} for graph "
f"#{graph_id} with {len(starting_nodes_input)} starting nodes. "
f"Now publishing to execution queue."
)
exec_queue = await get_async_execution_queue()
await exec_queue.publish_message(
await queue.publish_message(
routing_key=GRAPH_EXECUTION_ROUTING_KEY,
message=graph_exec_entry.model_dump_json(),
exchange=GRAPH_EXECUTION_EXCHANGE,
)
logger.info(f"Published execution {graph_exec.id} to RabbitMQ queue")
graph_exec.status = ExecutionStatus.QUEUED
await edb.update_graph_execution_stats(
graph_exec_id=graph_exec.id,
status=graph_exec.status,
)
await get_async_execution_event_bus().publish(graph_exec)
bus = get_async_execution_event_bus()
await bus.publish(graph_exec)
return graph_exec
except BaseException as e:

View File

@@ -3,7 +3,7 @@ from typing import cast
import pytest
from pytest_mock import MockerFixture
from backend.data.dynamic_fields import merge_execution_input, parse_execution_output
from backend.executor.utils import merge_execution_input, parse_execution_output
from backend.util.mock import MockObject
@@ -316,7 +316,6 @@ async def test_add_graph_execution_is_repeatable(mocker: MockerFixture):
# Mock the graph execution object
mock_graph_exec = mocker.MagicMock(spec=GraphExecutionWithNodes)
mock_graph_exec.id = "execution-id-123"
mock_graph_exec.node_executions = [] # Add this to avoid AttributeError
mock_graph_exec.to_graph_execution_entry.return_value = mocker.MagicMock()
# Mock user context
@@ -347,10 +346,6 @@ async def test_add_graph_execution_is_repeatable(mocker: MockerFixture):
)
mock_prisma.is_connected.return_value = True
mock_edb.create_graph_execution = mocker.AsyncMock(return_value=mock_graph_exec)
mock_edb.update_graph_execution_stats = mocker.AsyncMock(
return_value=mock_graph_exec
)
mock_edb.update_node_execution_status_batch = mocker.AsyncMock()
mock_get_user_context.return_value = mock_user_context
mock_get_queue.return_value = mock_queue
mock_get_event_bus.return_value = mock_event_bus

View File

@@ -151,10 +151,7 @@ class IntegrationCredentialsManager:
fresh_credentials = await oauth_handler.refresh_tokens(credentials)
await self.store.update_creds(user_id, fresh_credentials)
if _lock and (await _lock.locked()) and (await _lock.owned()):
try:
await _lock.release()
except Exception as e:
logger.warning(f"Failed to release OAuth refresh lock: {e}")
await _lock.release()
credentials = fresh_credentials
return credentials
@@ -187,10 +184,7 @@ class IntegrationCredentialsManager:
yield
finally:
if (await lock.locked()) and (await lock.owned()):
try:
await lock.release()
except Exception as e:
logger.warning(f"Failed to release credentials lock: {e}")
await lock.release()
async def release_all_locks(self):
"""Call this on process termination to ensure all locks are released"""

View File

@@ -1,14 +1,13 @@
import functools
from typing import TYPE_CHECKING
from autogpt_libs.utils.cache import cached
if TYPE_CHECKING:
from ..providers import ProviderName
from ._base import BaseWebhooksManager
# --8<-- [start:load_webhook_managers]
@cached()
@functools.cache
def load_webhook_managers() -> dict["ProviderName", type["BaseWebhooksManager"]]:
webhook_managers = {}

View File

@@ -7,9 +7,10 @@ from backend.data.graph import set_node_webhook
from backend.integrations.creds_manager import IntegrationCredentialsManager
from . import get_webhook_manager, supports_webhooks
from .utils import setup_webhook_for_block
if TYPE_CHECKING:
from backend.data.graph import BaseGraph, GraphModel, NodeModel
from backend.data.graph import BaseGraph, GraphModel, Node, NodeModel
from backend.data.model import Credentials
from ._base import BaseWebhooksManager
@@ -42,19 +43,32 @@ async def _on_graph_activate(graph: "BaseGraph", user_id: str) -> "BaseGraph": .
async def _on_graph_activate(graph: "BaseGraph | GraphModel", user_id: str):
get_credentials = credentials_manager.cached_getter(user_id)
updated_nodes = []
for new_node in graph.nodes:
block_input_schema = cast(BlockSchema, new_node.block.input_schema)
for creds_field_name in block_input_schema.get_credentials_fields().keys():
# Prevent saving graph with non-existent credentials
if (
creds_meta := new_node.input_default.get(creds_field_name)
) and not await get_credentials(creds_meta["id"]):
raise ValueError(
f"Node #{new_node.id} input '{creds_field_name}' updated with "
f"non-existent credentials #{creds_meta['id']}"
node_credentials = None
if (
# Webhook-triggered blocks are only allowed to have 1 credentials input
(
creds_field_name := next(
iter(block_input_schema.get_credentials_fields()), None
)
)
and (creds_meta := new_node.input_default.get(creds_field_name))
and not (node_credentials := await get_credentials(creds_meta["id"]))
):
raise ValueError(
f"Node #{new_node.id} input '{creds_field_name}' updated with "
f"non-existent credentials #{creds_meta['id']}"
)
updated_node = await on_node_activate(
user_id, graph.id, new_node, credentials=node_credentials
)
updated_nodes.append(updated_node)
graph.nodes = updated_nodes
return graph
@@ -71,14 +85,20 @@ async def on_graph_deactivate(graph: "GraphModel", user_id: str):
block_input_schema = cast(BlockSchema, node.block.input_schema)
node_credentials = None
for creds_field_name in block_input_schema.get_credentials_fields().keys():
if (creds_meta := node.input_default.get(creds_field_name)) and not (
node_credentials := await get_credentials(creds_meta["id"])
):
logger.warning(
f"Node #{node.id} input '{creds_field_name}' referenced "
f"non-existent credentials #{creds_meta['id']}"
if (
# Webhook-triggered blocks are only allowed to have 1 credentials input
(
creds_field_name := next(
iter(block_input_schema.get_credentials_fields()), None
)
)
and (creds_meta := node.input_default.get(creds_field_name))
and not (node_credentials := await get_credentials(creds_meta["id"]))
):
logger.error(
f"Node #{node.id} input '{creds_field_name}' referenced non-existent "
f"credentials #{creds_meta['id']}"
)
updated_node = await on_node_deactivate(
user_id, node, credentials=node_credentials
@@ -89,6 +109,32 @@ async def on_graph_deactivate(graph: "GraphModel", user_id: str):
return graph
async def on_node_activate(
user_id: str,
graph_id: str,
node: "Node",
*,
credentials: Optional["Credentials"] = None,
) -> "Node":
"""Hook to be called when the node is activated/created"""
if node.block.webhook_config:
new_webhook, feedback = await setup_webhook_for_block(
user_id=user_id,
trigger_block=node.block,
trigger_config=node.input_default,
for_graph_id=graph_id,
)
if new_webhook:
node = await set_node_webhook(node.id, new_webhook.id)
else:
logger.debug(
f"Node #{node.id} does not have everything for a webhook: {feedback}"
)
return node
async def on_node_deactivate(
user_id: str,
node: "NodeModel",

View File

@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Optional, cast
from pydantic import JsonValue
from backend.integrations.creds_manager import IntegrationCredentialsManager
from backend.integrations.providers import ProviderName
from backend.util.settings import Config
from . import get_webhook_manager, supports_webhooks
@@ -12,7 +13,6 @@ if TYPE_CHECKING:
from backend.data.block import Block, BlockSchema
from backend.data.integrations import Webhook
from backend.data.model import Credentials
from backend.integrations.providers import ProviderName
logger = logging.getLogger(__name__)
app_config = Config()
@@ -20,7 +20,7 @@ credentials_manager = IntegrationCredentialsManager()
# TODO: add test to assert this matches the actual API route
def webhook_ingress_url(provider_name: "ProviderName", webhook_id: str) -> str:
def webhook_ingress_url(provider_name: ProviderName, webhook_id: str) -> str:
return (
f"{app_config.platform_base_url}/api/integrations/{provider_name.value}"
f"/webhooks/{webhook_id}/ingress"
@@ -144,69 +144,3 @@ async def setup_webhook_for_block(
)
logger.debug(f"Acquired webhook: {webhook}")
return webhook, None
async def migrate_legacy_triggered_graphs():
from prisma.models import AgentGraph
from backend.data.graph import AGENT_GRAPH_INCLUDE, GraphModel, set_node_webhook
from backend.data.model import is_credentials_field_name
from backend.server.v2.library.db import create_preset
from backend.server.v2.library.model import LibraryAgentPresetCreatable
triggered_graphs = [
GraphModel.from_db(_graph)
for _graph in await AgentGraph.prisma().find_many(
where={
"isActive": True,
"Nodes": {"some": {"NOT": [{"webhookId": None}]}},
},
include=AGENT_GRAPH_INCLUDE,
)
]
n_migrated_webhooks = 0
for graph in triggered_graphs:
try:
if not (
(trigger_node := graph.webhook_input_node) and trigger_node.webhook_id
):
continue
# Use trigger node's inputs for the preset
preset_credentials = {
field_name: creds_meta
for field_name, creds_meta in trigger_node.input_default.items()
if is_credentials_field_name(field_name)
}
preset_inputs = {
field_name: value
for field_name, value in trigger_node.input_default.items()
if not is_credentials_field_name(field_name)
}
# Create a triggered preset for the graph
await create_preset(
graph.user_id,
LibraryAgentPresetCreatable(
graph_id=graph.id,
graph_version=graph.version,
inputs=preset_inputs,
credentials=preset_credentials,
name=graph.name,
description=graph.description,
webhook_id=trigger_node.webhook_id,
is_active=True,
),
)
# Detach webhook from the graph node
await set_node_webhook(trigger_node.id, None)
n_migrated_webhooks += 1
except Exception as e:
logger.error(f"Failed to migrate graph #{graph.id} trigger to preset: {e}")
continue
logger.info(f"Migrated {n_migrated_webhooks} node triggers to triggered presets")

View File

@@ -1,287 +0,0 @@
"""
Prometheus instrumentation for FastAPI services.
This module provides centralized metrics collection and instrumentation
for all FastAPI services in the AutoGPT platform.
"""
import logging
from typing import Optional
from fastapi import FastAPI
from prometheus_client import Counter, Gauge, Histogram, Info
from prometheus_fastapi_instrumentator import Instrumentator, metrics
logger = logging.getLogger(__name__)
# Custom business metrics with controlled cardinality
GRAPH_EXECUTIONS = Counter(
"autogpt_graph_executions_total",
"Total number of graph executions",
labelnames=[
"status"
], # Removed graph_id and user_id to prevent cardinality explosion
)
GRAPH_EXECUTIONS_BY_USER = Counter(
"autogpt_graph_executions_by_user_total",
"Total number of graph executions by user (sampled)",
labelnames=["status"], # Only status, user_id tracked separately when needed
)
BLOCK_EXECUTIONS = Counter(
"autogpt_block_executions_total",
"Total number of block executions",
labelnames=["block_type", "status"], # block_type is bounded
)
BLOCK_DURATION = Histogram(
"autogpt_block_duration_seconds",
"Duration of block executions in seconds",
labelnames=["block_type"],
buckets=[0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60],
)
WEBSOCKET_CONNECTIONS = Gauge(
"autogpt_websocket_connections_total",
"Total number of active WebSocket connections",
# Removed user_id label - track total only to prevent cardinality explosion
)
SCHEDULER_JOBS = Gauge(
"autogpt_scheduler_jobs",
"Current number of scheduled jobs",
labelnames=["job_type", "status"],
)
DATABASE_QUERIES = Histogram(
"autogpt_database_query_duration_seconds",
"Duration of database queries in seconds",
labelnames=["operation", "table"],
buckets=[0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5],
)
RABBITMQ_MESSAGES = Counter(
"autogpt_rabbitmq_messages_total",
"Total number of RabbitMQ messages",
labelnames=["queue", "status"],
)
AUTHENTICATION_ATTEMPTS = Counter(
"autogpt_auth_attempts_total",
"Total number of authentication attempts",
labelnames=["method", "status"],
)
API_KEY_USAGE = Counter(
"autogpt_api_key_usage_total",
"API key usage by provider",
labelnames=["provider", "block_type", "status"],
)
# Function/operation level metrics with controlled cardinality
GRAPH_OPERATIONS = Counter(
"autogpt_graph_operations_total",
"Graph operations by type",
labelnames=["operation", "status"], # create, update, delete, execute, etc.
)
USER_OPERATIONS = Counter(
"autogpt_user_operations_total",
"User operations by type",
labelnames=["operation", "status"], # login, register, update_profile, etc.
)
RATE_LIMIT_HITS = Counter(
"autogpt_rate_limit_hits_total",
"Number of rate limit hits",
labelnames=["endpoint"], # Removed user_id to prevent cardinality explosion
)
SERVICE_INFO = Info(
"autogpt_service",
"Service information",
)
def instrument_fastapi(
app: FastAPI,
service_name: str,
expose_endpoint: bool = True,
endpoint: str = "/metrics",
include_in_schema: bool = False,
excluded_handlers: Optional[list] = None,
) -> Instrumentator:
"""
Instrument a FastAPI application with Prometheus metrics.
Args:
app: FastAPI application instance
service_name: Name of the service for metrics labeling
expose_endpoint: Whether to expose /metrics endpoint
endpoint: Path for metrics endpoint
include_in_schema: Whether to include metrics endpoint in OpenAPI schema
excluded_handlers: List of paths to exclude from metrics
Returns:
Configured Instrumentator instance
"""
# Set service info
try:
from importlib.metadata import version
service_version = version("autogpt-platform-backend")
except Exception:
service_version = "unknown"
SERVICE_INFO.info(
{
"service": service_name,
"version": service_version,
}
)
# Create instrumentator with default metrics
instrumentator = Instrumentator(
should_group_status_codes=True,
should_ignore_untemplated=True,
should_respect_env_var=True,
should_instrument_requests_inprogress=True,
excluded_handlers=excluded_handlers or ["/health", "/readiness"],
env_var_name="ENABLE_METRICS",
inprogress_name="autogpt_http_requests_inprogress",
inprogress_labels=True,
)
# Add default HTTP metrics
instrumentator.add(
metrics.default(
metric_namespace="autogpt",
metric_subsystem=service_name.replace("-", "_"),
)
)
# Add request size metrics
instrumentator.add(
metrics.request_size(
metric_namespace="autogpt",
metric_subsystem=service_name.replace("-", "_"),
)
)
# Add response size metrics
instrumentator.add(
metrics.response_size(
metric_namespace="autogpt",
metric_subsystem=service_name.replace("-", "_"),
)
)
# Add latency metrics with custom buckets for better granularity
instrumentator.add(
metrics.latency(
metric_namespace="autogpt",
metric_subsystem=service_name.replace("-", "_"),
buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60],
)
)
# Add combined metrics (requests by method and status)
instrumentator.add(
metrics.combined_size(
metric_namespace="autogpt",
metric_subsystem=service_name.replace("-", "_"),
)
)
# Instrument the app
instrumentator.instrument(app)
# Expose metrics endpoint if requested
if expose_endpoint:
instrumentator.expose(
app,
endpoint=endpoint,
include_in_schema=include_in_schema,
tags=["monitoring"] if include_in_schema else None,
)
logger.info(f"Metrics endpoint exposed at {endpoint} for {service_name}")
return instrumentator
def record_graph_execution(graph_id: str, status: str, user_id: str):
"""Record a graph execution event.
Args:
graph_id: Graph identifier (kept for future sampling/debugging)
status: Execution status (success/error/validation_error)
user_id: User identifier (kept for future sampling/debugging)
"""
# Track overall executions without high-cardinality labels
GRAPH_EXECUTIONS.labels(status=status).inc()
# Optionally track per-user executions (implement sampling if needed)
# For now, just track status to avoid cardinality explosion
GRAPH_EXECUTIONS_BY_USER.labels(status=status).inc()
def record_block_execution(block_type: str, status: str, duration: float):
"""Record a block execution event with duration."""
BLOCK_EXECUTIONS.labels(block_type=block_type, status=status).inc()
BLOCK_DURATION.labels(block_type=block_type).observe(duration)
def update_websocket_connections(user_id: str, delta: int):
"""Update the number of active WebSocket connections.
Args:
user_id: User identifier (kept for future sampling/debugging)
delta: Change in connection count (+1 for connect, -1 for disconnect)
"""
# Track total connections without user_id to prevent cardinality explosion
if delta > 0:
WEBSOCKET_CONNECTIONS.inc(delta)
else:
WEBSOCKET_CONNECTIONS.dec(abs(delta))
def record_database_query(operation: str, table: str, duration: float):
"""Record a database query with duration."""
DATABASE_QUERIES.labels(operation=operation, table=table).observe(duration)
def record_rabbitmq_message(queue: str, status: str):
"""Record a RabbitMQ message event."""
RABBITMQ_MESSAGES.labels(queue=queue, status=status).inc()
def record_authentication_attempt(method: str, status: str):
"""Record an authentication attempt."""
AUTHENTICATION_ATTEMPTS.labels(method=method, status=status).inc()
def record_api_key_usage(provider: str, block_type: str, status: str):
"""Record API key usage by provider and block."""
API_KEY_USAGE.labels(provider=provider, block_type=block_type, status=status).inc()
def record_rate_limit_hit(endpoint: str, user_id: str):
"""Record a rate limit hit.
Args:
endpoint: API endpoint that was rate limited
user_id: User identifier (kept for future sampling/debugging)
"""
RATE_LIMIT_HITS.labels(endpoint=endpoint).inc()
def record_graph_operation(operation: str, status: str):
"""Record a graph operation (create, update, delete, execute, etc.)."""
GRAPH_OPERATIONS.labels(operation=operation, status=status).inc()
def record_user_operation(operation: str, status: str):
"""Record a user operation (login, register, etc.)."""
USER_OPERATIONS.labels(operation=operation, status=status).inc()

View File

@@ -25,11 +25,7 @@ from backend.data.notifications import (
get_summary_params_type,
)
from backend.data.rabbitmq import Exchange, ExchangeType, Queue, RabbitMQConfig
from backend.data.user import (
disable_all_user_notifications,
generate_unsubscribe_link,
set_user_email_verification,
)
from backend.data.user import generate_unsubscribe_link
from backend.notifications.email import EmailSender
from backend.util.clients import get_database_manager_async_client
from backend.util.logging import TruncatedLogger
@@ -42,7 +38,7 @@ from backend.util.service import (
endpoint_to_sync,
expose,
)
from backend.util.settings import AppEnvironment, Settings
from backend.util.settings import Settings
logger = TruncatedLogger(logging.getLogger(__name__), "[NotificationManager]")
settings = Settings()
@@ -128,12 +124,6 @@ def get_routing_key(event_type: NotificationType) -> str:
def queue_notification(event: NotificationEventModel) -> NotificationResult:
"""Queue a notification - exposed method for other services to call"""
# Disable in production
if settings.config.app_env == AppEnvironment.PRODUCTION:
return NotificationResult(
success=True,
message="Queueing notifications is disabled in production",
)
try:
logger.debug(f"Received Request to queue {event=}")
@@ -161,12 +151,6 @@ def queue_notification(event: NotificationEventModel) -> NotificationResult:
async def queue_notification_async(event: NotificationEventModel) -> NotificationResult:
"""Queue a notification - exposed method for other services to call"""
# Disable in production
if settings.config.app_env == AppEnvironment.PRODUCTION:
return NotificationResult(
success=True,
message="Queueing notifications is disabled in production",
)
try:
logger.debug(f"Received Request to queue {event=}")
@@ -229,9 +213,6 @@ class NotificationManager(AppService):
@expose
async def queue_weekly_summary(self):
# disable in prod
if settings.config.app_env == AppEnvironment.PRODUCTION:
return
# Use the existing event loop instead of creating a new one with asyncio.run()
asyncio.create_task(self._queue_weekly_summary())
@@ -245,9 +226,7 @@ class NotificationManager(AppService):
logger.info(
f"Querying for active users between {start_time} and {current_time}"
)
users = await get_database_manager_async_client(
should_retry=False
).get_active_user_ids_in_timerange(
users = await get_database_manager_async_client().get_active_user_ids_in_timerange(
end_time=current_time.isoformat(),
start_time=start_time.isoformat(),
)
@@ -274,9 +253,6 @@ class NotificationManager(AppService):
async def process_existing_batches(
self, notification_types: list[NotificationType]
):
# disable in prod
if settings.config.app_env == AppEnvironment.PRODUCTION:
return
# Use the existing event loop instead of creating a new process
asyncio.create_task(self._process_existing_batches(notification_types))
@@ -290,15 +266,15 @@ class NotificationManager(AppService):
for notification_type in notification_types:
# Get all batches for this notification type
batches = await get_database_manager_async_client(
should_retry=False
).get_all_batches_by_type(notification_type)
batches = (
await get_database_manager_async_client().get_all_batches_by_type(
notification_type
)
)
for batch in batches:
# Check if batch has aged out
oldest_message = await get_database_manager_async_client(
should_retry=False
).get_user_notification_oldest_message_in_batch(
oldest_message = await get_database_manager_async_client().get_user_notification_oldest_message_in_batch(
batch.user_id, notification_type
)
@@ -313,9 +289,9 @@ class NotificationManager(AppService):
# If batch has aged out, process it
if oldest_message.created_at + max_delay < current_time:
recipient_email = await get_database_manager_async_client(
should_retry=False
).get_user_email_by_id(batch.user_id)
recipient_email = await get_database_manager_async_client().get_user_email_by_id(
batch.user_id
)
if not recipient_email:
logger.error(
@@ -332,25 +308,21 @@ class NotificationManager(AppService):
f"User {batch.user_id} does not want to receive {notification_type} notifications"
)
# Clear the batch
await get_database_manager_async_client(
should_retry=False
).empty_user_notification_batch(
await get_database_manager_async_client().empty_user_notification_batch(
batch.user_id, notification_type
)
continue
batch_data = await get_database_manager_async_client(
should_retry=False
).get_user_notification_batch(batch.user_id, notification_type)
batch_data = await get_database_manager_async_client().get_user_notification_batch(
batch.user_id, notification_type
)
if not batch_data or not batch_data.notifications:
logger.error(
f"Batch data not found for user {batch.user_id}"
)
# Clear the batch
await get_database_manager_async_client(
should_retry=False
).empty_user_notification_batch(
await get_database_manager_async_client().empty_user_notification_batch(
batch.user_id, notification_type
)
continue
@@ -386,9 +358,7 @@ class NotificationManager(AppService):
)
# Clear the batch
await get_database_manager_async_client(
should_retry=False
).empty_user_notification_batch(
await get_database_manager_async_client().empty_user_notification_batch(
batch.user_id, notification_type
)
@@ -443,13 +413,15 @@ class NotificationManager(AppService):
self, user_id: str, event_type: NotificationType
) -> bool:
"""Check if a user wants to receive a notification based on their preferences and email verification status"""
validated_email = await get_database_manager_async_client(
should_retry=False
).get_user_email_verification(user_id)
validated_email = (
await get_database_manager_async_client().get_user_email_verification(
user_id
)
)
preference = (
await get_database_manager_async_client(
should_retry=False
).get_user_notification_preference(user_id)
await get_database_manager_async_client().get_user_notification_preference(
user_id
)
).preferences.get(event_type, True)
# only if both are true, should we email this person
return validated_email and preference
@@ -465,9 +437,7 @@ class NotificationManager(AppService):
try:
# Get summary data from the database
summary_data = await get_database_manager_async_client(
should_retry=False
).get_user_execution_summary_data(
summary_data = await get_database_manager_async_client().get_user_execution_summary_data(
user_id=user_id,
start_time=params.start_date,
end_time=params.end_date,
@@ -554,13 +524,13 @@ class NotificationManager(AppService):
self, user_id: str, event_type: NotificationType, event: NotificationEventModel
) -> bool:
await get_database_manager_async_client(
should_retry=False
).create_or_add_to_user_notification_batch(user_id, event_type, event)
await get_database_manager_async_client().create_or_add_to_user_notification_batch(
user_id, event_type, event
)
oldest_message = await get_database_manager_async_client(
should_retry=False
).get_user_notification_oldest_message_in_batch(user_id, event_type)
oldest_message = await get_database_manager_async_client().get_user_notification_oldest_message_in_batch(
user_id, event_type
)
if not oldest_message:
logger.error(
f"Batch for user {user_id} and type {event_type} has no oldest message whichshould never happen!!!!!!!!!!!!!!!!"
@@ -610,9 +580,11 @@ class NotificationManager(AppService):
return False
logger.debug(f"Processing immediate notification: {event}")
recipient_email = await get_database_manager_async_client(
should_retry=False
).get_user_email_by_id(event.user_id)
recipient_email = (
await get_database_manager_async_client().get_user_email_by_id(
event.user_id
)
)
if not recipient_email:
logger.error(f"User email not found for user {event.user_id}")
return False
@@ -647,9 +619,11 @@ class NotificationManager(AppService):
return False
logger.info(f"Processing batch notification: {event}")
recipient_email = await get_database_manager_async_client(
should_retry=False
).get_user_email_by_id(event.user_id)
recipient_email = (
await get_database_manager_async_client().get_user_email_by_id(
event.user_id
)
)
if not recipient_email:
logger.error(f"User email not found for user {event.user_id}")
return False
@@ -668,9 +642,11 @@ class NotificationManager(AppService):
if not should_send:
logger.info("Batch not old enough to send")
return False
batch = await get_database_manager_async_client(
should_retry=False
).get_user_notification_batch(event.user_id, event.type)
batch = (
await get_database_manager_async_client().get_user_notification_batch(
event.user_id, event.type
)
)
if not batch or not batch.notifications:
logger.error(f"Batch not found for user {event.user_id}")
return False
@@ -681,7 +657,6 @@ class NotificationManager(AppService):
get_notif_data_type(db_event.type)
].model_validate(
{
"id": db_event.id, # Include ID from database
"user_id": event.user_id,
"type": db_event.type,
"data": db_event.data,
@@ -704,9 +679,6 @@ class NotificationManager(AppService):
chunk_sent = False
for attempt_size in [chunk_size, 50, 25, 10, 5, 1]:
chunk = batch_messages[i : i + attempt_size]
chunk_ids = [
msg.id for msg in chunk if msg.id
] # Extract IDs for removal
try:
# Try to render the email to check its size
@@ -733,23 +705,6 @@ class NotificationManager(AppService):
user_unsub_link=unsub_link,
)
# Remove successfully sent notifications immediately
if chunk_ids:
try:
await get_database_manager_async_client(
should_retry=False
).remove_notifications_from_batch(
event.user_id, event.type, chunk_ids
)
logger.info(
f"Removed {len(chunk_ids)} sent notifications from batch"
)
except Exception as e:
logger.error(
f"Failed to remove sent notifications: {e}"
)
# Continue anyway - better to risk duplicates than lose emails
# Track successful sends
successfully_sent_count += len(chunk)
@@ -767,137 +722,13 @@ class NotificationManager(AppService):
i += len(chunk)
chunk_sent = True
break
else:
# Message is too large even after size reduction
if attempt_size == 1:
logger.error(
f"Failed to send notification at index {i}: "
f"Single notification exceeds email size limit "
f"({len(test_message):,} chars > {MAX_EMAIL_SIZE:,} chars). "
f"Removing permanently from batch - will not retry."
)
# Remove the oversized notification permanently - it will NEVER fit
if chunk_ids:
try:
await get_database_manager_async_client(
should_retry=False
).remove_notifications_from_batch(
event.user_id, event.type, chunk_ids
)
logger.info(
f"Removed oversized notification {chunk_ids[0]} from batch permanently"
)
except Exception as e:
logger.error(
f"Failed to remove oversized notification: {e}"
)
failed_indices.append(i)
i += 1
chunk_sent = True
break
# Try smaller chunk size
continue
except Exception as e:
# Check if it's a Postmark API error
if attempt_size == 1:
# Single notification failed - determine the actual cause
error_message = str(e).lower()
error_type = type(e).__name__
# Check for HTTP 406 - Inactive recipient (common in Postmark errors)
if "406" in error_message or "inactive" in error_message:
logger.warning(
f"Failed to send notification at index {i}: "
f"Recipient marked as inactive by Postmark. "
f"Error: {e}. Disabling ALL notifications for this user."
)
# 1. Mark email as unverified
try:
await set_user_email_verification(
event.user_id, False
)
logger.info(
f"Set email verification to false for user {event.user_id}"
)
except Exception as deactivation_error:
logger.error(
f"Failed to deactivate email for user {event.user_id}: "
f"{deactivation_error}"
)
# 2. Disable all notification preferences
try:
await disable_all_user_notifications(event.user_id)
logger.info(
f"Disabled all notification preferences for user {event.user_id}"
)
except Exception as disable_error:
logger.error(
f"Failed to disable notification preferences: {disable_error}"
)
# 3. Clear ALL notification batches for this user
try:
await get_database_manager_async_client(
should_retry=False
).clear_all_user_notification_batches(event.user_id)
logger.info(
f"Cleared ALL notification batches for user {event.user_id}"
)
except Exception as remove_error:
logger.error(
f"Failed to clear batches for inactive recipient: {remove_error}"
)
# Stop processing - we've nuked everything for this user
return True
# Check for HTTP 422 - Malformed data
elif (
"422" in error_message
or "unprocessable" in error_message
):
logger.error(
f"Failed to send notification at index {i}: "
f"Malformed notification data rejected by Postmark. "
f"Error: {e}. Removing from batch permanently."
)
# Remove from batch - 422 means bad data that won't fix itself
if chunk_ids:
try:
await get_database_manager_async_client(
should_retry=False
).remove_notifications_from_batch(
event.user_id, event.type, chunk_ids
)
logger.info(
"Removed malformed notification from batch permanently"
)
except Exception as remove_error:
logger.error(
f"Failed to remove malformed notification: {remove_error}"
)
# Check if it's a ValueError for size limit
elif (
isinstance(e, ValueError)
and "too large" in error_message
):
logger.error(
f"Failed to send notification at index {i}: "
f"Notification size exceeds email limit. "
f"Error: {e}. Skipping this notification."
)
# Other API errors
else:
logger.error(
f"Failed to send notification at index {i}: "
f"Email API error ({error_type}): {e}. "
f"Skipping this notification."
)
# Even single notification is too large
logger.error(
f"Single notification too large to send: {e}. "
f"Skipping notification at index {i}"
)
failed_indices.append(i)
i += 1
chunk_sent = True
@@ -911,20 +742,18 @@ class NotificationManager(AppService):
failed_indices.append(i)
i += 1
# Check what remains in the batch (notifications are removed as sent)
remaining_batch = await get_database_manager_async_client(
should_retry=False
).get_user_notification_batch(event.user_id, event.type)
if not remaining_batch or not remaining_batch.notifications:
# Only empty the batch if ALL notifications were sent successfully
if successfully_sent_count == len(batch_messages):
logger.info(
f"All {successfully_sent_count} notifications sent and removed from batch"
f"Successfully sent all {successfully_sent_count} notifications, clearing batch"
)
await get_database_manager_async_client().empty_user_notification_batch(
event.user_id, event.type
)
else:
remaining_count = len(remaining_batch.notifications)
logger.warning(
f"Sent {successfully_sent_count} notifications. "
f"{remaining_count} remain in batch for retry due to errors."
f"Only sent {successfully_sent_count} of {len(batch_messages)} notifications. "
f"Failed indices: {failed_indices}. Batch will be retained for retry."
)
return True
except Exception as e:
@@ -942,9 +771,11 @@ class NotificationManager(AppService):
logger.info(f"Processing summary notification: {model}")
recipient_email = await get_database_manager_async_client(
should_retry=False
).get_user_email_by_id(event.user_id)
recipient_email = (
await get_database_manager_async_client().get_user_email_by_id(
event.user_id
)
)
if not recipient_email:
logger.error(f"User email not found for user {event.user_id}")
return False

View File

@@ -1,598 +0,0 @@
"""Tests for notification error handling in NotificationManager."""
from datetime import datetime, timezone
from unittest.mock import AsyncMock, MagicMock, Mock, patch
import pytest
from prisma.enums import NotificationType
from backend.data.notifications import AgentRunData, NotificationEventModel
from backend.notifications.notifications import NotificationManager
class TestNotificationErrorHandling:
"""Test cases for notification error handling in NotificationManager."""
@pytest.fixture
def notification_manager(self):
"""Create a NotificationManager instance for testing."""
with patch("backend.notifications.notifications.AppService.__init__"):
manager = NotificationManager()
manager.email_sender = MagicMock()
# Mock the _get_template method used by _process_batch
template_mock = Mock()
template_mock.base_template = "base"
template_mock.subject_template = "subject"
template_mock.body_template = "body"
manager.email_sender._get_template = Mock(return_value=template_mock)
# Mock the formatter
manager.email_sender.formatter = Mock()
manager.email_sender.formatter.format_email = Mock(
return_value=("subject", "body content")
)
manager.email_sender.formatter.env = Mock()
manager.email_sender.formatter.env.globals = {
"base_url": "http://example.com"
}
return manager
@pytest.fixture
def sample_batch_event(self):
"""Create a sample batch event for testing."""
return NotificationEventModel(
type=NotificationType.AGENT_RUN,
user_id="user_1",
created_at=datetime.now(timezone.utc),
data=AgentRunData(
agent_name="Test Agent",
credits_used=10.0,
execution_time=5.0,
node_count=3,
graph_id="graph_1",
outputs=[],
),
)
@pytest.fixture
def sample_batch_notifications(self):
"""Create sample batch notifications for testing."""
notifications = []
for i in range(3):
notification = Mock()
notification.type = NotificationType.AGENT_RUN
notification.data = {
"agent_name": f"Test Agent {i}",
"credits_used": 10.0 * (i + 1),
"execution_time": 5.0 * (i + 1),
"node_count": 3 + i,
"graph_id": f"graph_{i}",
"outputs": [],
}
notification.created_at = datetime.now(timezone.utc)
notifications.append(notification)
return notifications
@pytest.mark.asyncio
async def test_406_stops_all_processing_for_user(
self, notification_manager, sample_batch_event
):
"""Test that 406 inactive recipient error stops ALL processing for that user."""
with patch("backend.notifications.notifications.logger"), patch(
"backend.notifications.notifications.set_user_email_verification",
new_callable=AsyncMock,
) as mock_set_verification, patch(
"backend.notifications.notifications.disable_all_user_notifications",
new_callable=AsyncMock,
) as mock_disable_all, patch(
"backend.notifications.notifications.get_database_manager_async_client"
) as mock_db_client, patch(
"backend.notifications.notifications.generate_unsubscribe_link"
) as mock_unsub_link:
# Create batch of 5 notifications
notifications = []
for i in range(5):
notification = Mock()
notification.id = f"notif_{i}"
notification.type = NotificationType.AGENT_RUN
notification.data = {
"agent_name": f"Test Agent {i}",
"credits_used": 10.0 * (i + 1),
"execution_time": 5.0 * (i + 1),
"node_count": 3 + i,
"graph_id": f"graph_{i}",
"outputs": [],
}
notification.created_at = datetime.now(timezone.utc)
notifications.append(notification)
# Setup mocks
mock_db = mock_db_client.return_value
mock_db.get_user_email_by_id = AsyncMock(return_value="test@example.com")
mock_db.get_user_notification_batch = AsyncMock(
return_value=Mock(notifications=notifications)
)
mock_db.clear_all_user_notification_batches = AsyncMock()
mock_db.remove_notifications_from_batch = AsyncMock()
mock_unsub_link.return_value = "http://example.com/unsub"
# Mock internal methods
notification_manager._should_email_user_based_on_preference = AsyncMock(
return_value=True
)
notification_manager._should_batch = AsyncMock(return_value=True)
notification_manager._parse_message = Mock(return_value=sample_batch_event)
# Track calls
call_count = [0]
def send_side_effect(*args, **kwargs):
data = kwargs.get("data", [])
if isinstance(data, list) and len(data) == 1:
current_call = call_count[0]
call_count[0] += 1
# First two succeed, third hits 406
if current_call < 2:
return None
else:
raise Exception("Recipient marked as inactive (406)")
# Force single processing
raise Exception("Force single processing")
notification_manager.email_sender.send_templated.side_effect = (
send_side_effect
)
# Act
result = await notification_manager._process_batch(
sample_batch_event.model_dump_json()
)
# Assert
assert result is True
# Only 3 calls should have been made (2 successful, 1 failed with 406)
assert call_count[0] == 3
# User should be deactivated
mock_set_verification.assert_called_once_with("user_1", False)
mock_disable_all.assert_called_once_with("user_1")
mock_db.clear_all_user_notification_batches.assert_called_once_with(
"user_1"
)
# No further processing should occur after 406
@pytest.mark.asyncio
async def test_422_permanently_removes_malformed_notification(
self, notification_manager, sample_batch_event
):
"""Test that 422 error permanently removes the malformed notification from batch and continues with others."""
with patch("backend.notifications.notifications.logger") as mock_logger, patch(
"backend.notifications.notifications.get_database_manager_async_client"
) as mock_db_client, patch(
"backend.notifications.notifications.generate_unsubscribe_link"
) as mock_unsub_link:
# Create batch of 5 notifications
notifications = []
for i in range(5):
notification = Mock()
notification.id = f"notif_{i}"
notification.type = NotificationType.AGENT_RUN
notification.data = {
"agent_name": f"Test Agent {i}",
"credits_used": 10.0 * (i + 1),
"execution_time": 5.0 * (i + 1),
"node_count": 3 + i,
"graph_id": f"graph_{i}",
"outputs": [],
}
notification.created_at = datetime.now(timezone.utc)
notifications.append(notification)
# Setup mocks
mock_db = mock_db_client.return_value
mock_db.get_user_email_by_id = AsyncMock(return_value="test@example.com")
mock_db.get_user_notification_batch = AsyncMock(
side_effect=[
Mock(notifications=notifications),
Mock(notifications=[]), # Empty after processing
]
)
mock_db.remove_notifications_from_batch = AsyncMock()
mock_unsub_link.return_value = "http://example.com/unsub"
# Mock internal methods
notification_manager._should_email_user_based_on_preference = AsyncMock(
return_value=True
)
notification_manager._should_batch = AsyncMock(return_value=True)
notification_manager._parse_message = Mock(return_value=sample_batch_event)
# Track calls
call_count = [0]
successful_indices = []
removed_notification_ids = []
# Capture what gets removed
def remove_side_effect(user_id, notif_type, notif_ids):
removed_notification_ids.extend(notif_ids)
return None
mock_db.remove_notifications_from_batch.side_effect = remove_side_effect
def send_side_effect(*args, **kwargs):
data = kwargs.get("data", [])
if isinstance(data, list) and len(data) == 1:
current_call = call_count[0]
call_count[0] += 1
# Index 2 has malformed data (422)
if current_call == 2:
raise Exception(
"Unprocessable entity (422): Malformed email data"
)
else:
successful_indices.append(current_call)
return None
# Force single processing
raise Exception("Force single processing")
notification_manager.email_sender.send_templated.side_effect = (
send_side_effect
)
# Act
result = await notification_manager._process_batch(
sample_batch_event.model_dump_json()
)
# Assert
assert result is True
assert call_count[0] == 5 # All 5 attempted
assert len(successful_indices) == 4 # 4 succeeded (all except index 2)
assert 2 not in successful_indices # Index 2 failed
# Verify 422 error was logged
error_calls = [call[0][0] for call in mock_logger.error.call_args_list]
assert any(
"422" in call or "malformed" in call.lower() for call in error_calls
)
# Verify all notifications were removed (4 successful + 1 malformed)
assert mock_db.remove_notifications_from_batch.call_count == 5
assert (
"notif_2" in removed_notification_ids
) # Malformed one was removed permanently
@pytest.mark.asyncio
async def test_oversized_notification_permanently_removed(
self, notification_manager, sample_batch_event
):
"""Test that oversized notifications are permanently removed from batch but others continue."""
with patch("backend.notifications.notifications.logger") as mock_logger, patch(
"backend.notifications.notifications.get_database_manager_async_client"
) as mock_db_client, patch(
"backend.notifications.notifications.generate_unsubscribe_link"
) as mock_unsub_link:
# Create batch of 5 notifications
notifications = []
for i in range(5):
notification = Mock()
notification.id = f"notif_{i}"
notification.type = NotificationType.AGENT_RUN
notification.data = {
"agent_name": f"Test Agent {i}",
"credits_used": 10.0 * (i + 1),
"execution_time": 5.0 * (i + 1),
"node_count": 3 + i,
"graph_id": f"graph_{i}",
"outputs": [],
}
notification.created_at = datetime.now(timezone.utc)
notifications.append(notification)
# Setup mocks
mock_db = mock_db_client.return_value
mock_db.get_user_email_by_id = AsyncMock(return_value="test@example.com")
mock_db.get_user_notification_batch = AsyncMock(
side_effect=[
Mock(notifications=notifications),
Mock(notifications=[]), # Empty after processing
]
)
mock_db.remove_notifications_from_batch = AsyncMock()
mock_unsub_link.return_value = "http://example.com/unsub"
# Mock internal methods
notification_manager._should_email_user_based_on_preference = AsyncMock(
return_value=True
)
notification_manager._should_batch = AsyncMock(return_value=True)
notification_manager._parse_message = Mock(return_value=sample_batch_event)
# Override formatter to simulate oversized on index 3
# original_format = notification_manager.email_sender.formatter.format_email
def format_side_effect(*args, **kwargs):
# Check if we're formatting index 3
data = kwargs.get("data", {}).get("notifications", [])
if data and len(data) == 1:
# Check notification content to identify index 3
if any(
"Test Agent 3" in str(n.data)
for n in data
if hasattr(n, "data")
):
# Return oversized message for index 3
return ("subject", "x" * 5_000_000) # Over 4.5MB limit
return ("subject", "normal sized content")
notification_manager.email_sender.formatter.format_email = Mock(
side_effect=format_side_effect
)
# Track calls
successful_indices = []
def send_side_effect(*args, **kwargs):
data = kwargs.get("data", [])
if isinstance(data, list) and len(data) == 1:
# Track which notification was sent based on content
for i, notif in enumerate(notifications):
if any(
f"Test Agent {i}" in str(n.data)
for n in data
if hasattr(n, "data")
):
successful_indices.append(i)
return None
return None
# Force single processing
raise Exception("Force single processing")
notification_manager.email_sender.send_templated.side_effect = (
send_side_effect
)
# Act
result = await notification_manager._process_batch(
sample_batch_event.model_dump_json()
)
# Assert
assert result is True
assert (
len(successful_indices) == 4
) # Only 4 sent (index 3 skipped due to size)
assert 3 not in successful_indices # Index 3 was not sent
# Verify oversized error was logged
error_calls = [call[0][0] for call in mock_logger.error.call_args_list]
assert any(
"exceeds email size limit" in call or "oversized" in call.lower()
for call in error_calls
)
@pytest.mark.asyncio
async def test_generic_api_error_keeps_notification_for_retry(
self, notification_manager, sample_batch_event
):
"""Test that generic API errors keep notifications in batch for retry while others continue."""
with patch("backend.notifications.notifications.logger") as mock_logger, patch(
"backend.notifications.notifications.get_database_manager_async_client"
) as mock_db_client, patch(
"backend.notifications.notifications.generate_unsubscribe_link"
) as mock_unsub_link:
# Create batch of 5 notifications
notifications = []
for i in range(5):
notification = Mock()
notification.id = f"notif_{i}"
notification.type = NotificationType.AGENT_RUN
notification.data = {
"agent_name": f"Test Agent {i}",
"credits_used": 10.0 * (i + 1),
"execution_time": 5.0 * (i + 1),
"node_count": 3 + i,
"graph_id": f"graph_{i}",
"outputs": [],
}
notification.created_at = datetime.now(timezone.utc)
notifications.append(notification)
# Notification that failed with generic error
failed_notifications = [notifications[1]] # Only index 1 remains for retry
# Setup mocks
mock_db = mock_db_client.return_value
mock_db.get_user_email_by_id = AsyncMock(return_value="test@example.com")
mock_db.get_user_notification_batch = AsyncMock(
side_effect=[
Mock(notifications=notifications),
Mock(
notifications=failed_notifications
), # Failed ones remain for retry
]
)
mock_db.remove_notifications_from_batch = AsyncMock()
mock_unsub_link.return_value = "http://example.com/unsub"
# Mock internal methods
notification_manager._should_email_user_based_on_preference = AsyncMock(
return_value=True
)
notification_manager._should_batch = AsyncMock(return_value=True)
notification_manager._parse_message = Mock(return_value=sample_batch_event)
# Track calls
successful_indices = []
failed_indices = []
removed_notification_ids = []
# Capture what gets removed
def remove_side_effect(user_id, notif_type, notif_ids):
removed_notification_ids.extend(notif_ids)
return None
mock_db.remove_notifications_from_batch.side_effect = remove_side_effect
def send_side_effect(*args, **kwargs):
data = kwargs.get("data", [])
if isinstance(data, list) and len(data) == 1:
# Track which notification based on content
for i, notif in enumerate(notifications):
if any(
f"Test Agent {i}" in str(n.data)
for n in data
if hasattr(n, "data")
):
# Index 1 has generic API error
if i == 1:
failed_indices.append(i)
raise Exception("Network timeout - temporary failure")
else:
successful_indices.append(i)
return None
return None
# Force single processing
raise Exception("Force single processing")
notification_manager.email_sender.send_templated.side_effect = (
send_side_effect
)
# Act
result = await notification_manager._process_batch(
sample_batch_event.model_dump_json()
)
# Assert
assert result is True
assert len(successful_indices) == 4 # 4 succeeded (0, 2, 3, 4)
assert len(failed_indices) == 1 # 1 failed
assert 1 in failed_indices # Index 1 failed
# Verify generic error was logged
error_calls = [call[0][0] for call in mock_logger.error.call_args_list]
assert any(
"api error" in call.lower() or "skipping" in call.lower()
for call in error_calls
)
# Only successful ones should be removed from batch (failed one stays for retry)
assert mock_db.remove_notifications_from_batch.call_count == 4
assert (
"notif_1" not in removed_notification_ids
) # Failed one NOT removed (stays for retry)
assert "notif_0" in removed_notification_ids # Successful one removed
assert "notif_2" in removed_notification_ids # Successful one removed
assert "notif_3" in removed_notification_ids # Successful one removed
assert "notif_4" in removed_notification_ids # Successful one removed
@pytest.mark.asyncio
async def test_batch_all_notifications_sent_successfully(
self, notification_manager, sample_batch_event
):
"""Test successful batch processing where all notifications are sent without errors."""
with patch("backend.notifications.notifications.logger") as mock_logger, patch(
"backend.notifications.notifications.get_database_manager_async_client"
) as mock_db_client, patch(
"backend.notifications.notifications.generate_unsubscribe_link"
) as mock_unsub_link:
# Create batch of 5 notifications
notifications = []
for i in range(5):
notification = Mock()
notification.id = f"notif_{i}"
notification.type = NotificationType.AGENT_RUN
notification.data = {
"agent_name": f"Test Agent {i}",
"credits_used": 10.0 * (i + 1),
"execution_time": 5.0 * (i + 1),
"node_count": 3 + i,
"graph_id": f"graph_{i}",
"outputs": [],
}
notification.created_at = datetime.now(timezone.utc)
notifications.append(notification)
# Setup mocks
mock_db = mock_db_client.return_value
mock_db.get_user_email_by_id = AsyncMock(return_value="test@example.com")
mock_db.get_user_notification_batch = AsyncMock(
side_effect=[
Mock(notifications=notifications),
Mock(notifications=[]), # Empty after all sent successfully
]
)
mock_db.remove_notifications_from_batch = AsyncMock()
mock_unsub_link.return_value = "http://example.com/unsub"
# Mock internal methods
notification_manager._should_email_user_based_on_preference = AsyncMock(
return_value=True
)
notification_manager._should_batch = AsyncMock(return_value=True)
notification_manager._parse_message = Mock(return_value=sample_batch_event)
# Track successful sends
successful_indices = []
removed_notification_ids = []
# Capture what gets removed
def remove_side_effect(user_id, notif_type, notif_ids):
removed_notification_ids.extend(notif_ids)
return None
mock_db.remove_notifications_from_batch.side_effect = remove_side_effect
def send_side_effect(*args, **kwargs):
data = kwargs.get("data", [])
if isinstance(data, list) and len(data) == 1:
# Track which notification was sent
for i, notif in enumerate(notifications):
if any(
f"Test Agent {i}" in str(n.data)
for n in data
if hasattr(n, "data")
):
successful_indices.append(i)
return None
return None # Success
# Force single processing
raise Exception("Force single processing")
notification_manager.email_sender.send_templated.side_effect = (
send_side_effect
)
# Act
result = await notification_manager._process_batch(
sample_batch_event.model_dump_json()
)
# Assert
assert result is True
# All 5 notifications should be sent successfully
assert len(successful_indices) == 5
assert successful_indices == [0, 1, 2, 3, 4]
# All notifications should be removed from batch
assert mock_db.remove_notifications_from_batch.call_count == 5
assert len(removed_notification_ids) == 5
for i in range(5):
assert f"notif_{i}" in removed_notification_ids
# No errors should be logged
assert mock_logger.error.call_count == 0
# Info message about successful sends should be logged
info_calls = [call[0][0] for call in mock_logger.info.call_args_list]
assert any("sent and removed" in call.lower() for call in info_calls)

View File

@@ -6,10 +6,10 @@ import logging
import threading
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type
from pydantic import BaseModel
from pydantic import BaseModel, SecretStr
from backend.blocks.basic import Block
from backend.data.model import Credentials
from backend.data.model import APIKeyCredentials, Credentials
from backend.integrations.oauth.base import BaseOAuthHandler
from backend.integrations.providers import ProviderName
from backend.integrations.webhooks._base import BaseWebhooksManager
@@ -17,8 +17,6 @@ from backend.integrations.webhooks._base import BaseWebhooksManager
if TYPE_CHECKING:
from backend.sdk.provider import Provider
logger = logging.getLogger(__name__)
class SDKOAuthCredentials(BaseModel):
"""OAuth credentials configuration for SDK providers."""
@@ -104,8 +102,21 @@ class AutoRegistry:
"""Register an environment variable as an API key for a provider."""
with cls._lock:
cls._api_key_mappings[provider] = env_var_name
# Note: The credential itself is created by ProviderBuilder.with_api_key()
# We only store the mapping here to avoid duplication
# Dynamically check if the env var exists and create credential
import os
api_key = os.getenv(env_var_name)
if api_key:
credential = APIKeyCredentials(
id=f"{provider}-default",
provider=provider,
api_key=SecretStr(api_key),
title=f"Default {provider} credentials",
)
# Check if credential already exists to avoid duplicates
if not any(c.id == credential.id for c in cls._default_credentials):
cls._default_credentials.append(credential)
@classmethod
def get_all_credentials(cls) -> List[Credentials]:
@@ -199,43 +210,3 @@ class AutoRegistry:
webhooks.load_webhook_managers = patched_load
except Exception as e:
logging.warning(f"Failed to patch webhook managers: {e}")
# Patch credentials store to include SDK-registered credentials
try:
import sys
from typing import Any
# Get the module from sys.modules to respect mocking
if "backend.integrations.credentials_store" in sys.modules:
creds_store: Any = sys.modules["backend.integrations.credentials_store"]
else:
import backend.integrations.credentials_store
creds_store: Any = backend.integrations.credentials_store
if hasattr(creds_store, "IntegrationCredentialsStore"):
store_class = creds_store.IntegrationCredentialsStore
if hasattr(store_class, "get_all_creds"):
original_get_all_creds = store_class.get_all_creds
async def patched_get_all_creds(self, user_id: str):
# Get original credentials
original_creds = await original_get_all_creds(self, user_id)
# Add SDK-registered credentials
sdk_creds = cls.get_all_credentials()
# Combine credentials, avoiding duplicates by ID
existing_ids = {c.id for c in original_creds}
for cred in sdk_creds:
if cred.id not in existing_ids:
original_creds.append(cred)
return original_creds
store_class.get_all_creds = patched_get_all_creds
logger.info(
"Successfully patched IntegrationCredentialsStore.get_all_creds"
)
except Exception as e:
logging.warning(f"Failed to patch credentials store: {e}")

View File

@@ -1,6 +1,5 @@
from fastapi import FastAPI
from backend.monitoring.instrumentation import instrument_fastapi
from backend.server.middleware.security import SecurityHeadersMiddleware
from .routes.v1 import v1_router
@@ -14,12 +13,3 @@ external_app = FastAPI(
external_app.add_middleware(SecurityHeadersMiddleware)
external_app.include_router(v1_router, prefix="/v1")
# Add Prometheus instrumentation
instrument_fastapi(
external_app,
service_name="external-api",
expose_endpoint=True,
endpoint="/metrics",
include_in_schema=True,
)

View File

@@ -49,7 +49,7 @@ class GraphExecutionResult(TypedDict):
tags=["blocks"],
dependencies=[Security(require_permission(APIKeyPermission.READ_BLOCK))],
)
async def get_graph_blocks() -> Sequence[dict[Any, Any]]:
def get_graph_blocks() -> Sequence[dict[Any, Any]]:
blocks = [block() for block in backend.data.block.get_blocks().values()]
return [b.to_dict() for b in blocks if not b.disabled]

View File

@@ -32,7 +32,6 @@ from backend.data.model import (
OAuth2Credentials,
UserIntegrations,
)
from backend.data.onboarding import complete_webhook_trigger_step
from backend.data.user import get_user_integrations
from backend.executor.utils import add_graph_execution
from backend.integrations.ayrshare import AyrshareClient, SocialPlatform
@@ -64,7 +63,7 @@ class LoginResponse(BaseModel):
state_token: str
@router.get("/{provider}/login", summary="Initiate OAuth flow")
@router.get("/{provider}/login")
async def login(
provider: Annotated[
ProviderName, Path(title="The provider to initiate an OAuth flow for")
@@ -102,7 +101,7 @@ class CredentialsMetaResponse(BaseModel):
)
@router.post("/{provider}/callback", summary="Exchange OAuth code for tokens")
@router.post("/{provider}/callback")
async def callback(
provider: Annotated[
ProviderName, Path(title="The target provider for this OAuth exchange")
@@ -180,7 +179,7 @@ async def callback(
)
@router.get("/credentials", summary="List Credentials")
@router.get("/credentials")
async def list_credentials(
user_id: Annotated[str, Security(get_user_id)],
) -> list[CredentialsMetaResponse]:
@@ -221,9 +220,7 @@ async def list_credentials_by_provider(
]
@router.get(
"/{provider}/credentials/{cred_id}", summary="Get Specific Credential By ID"
)
@router.get("/{provider}/credentials/{cred_id}")
async def get_credential(
provider: Annotated[
ProviderName, Path(title="The provider to retrieve credentials for")
@@ -244,7 +241,7 @@ async def get_credential(
return credential
@router.post("/{provider}/credentials", status_code=201, summary="Create Credentials")
@router.post("/{provider}/credentials", status_code=201)
async def create_credentials(
user_id: Annotated[str, Security(get_user_id)],
provider: Annotated[
@@ -370,8 +367,6 @@ async def webhook_ingress_generic(
return
executions: list[Awaitable] = []
await complete_webhook_trigger_step(user_id)
for node in webhook.triggered_nodes:
logger.debug(f"Webhook-attached node: {node}")
if not node.is_triggered_by_event_type(event_type):

View File

@@ -1,10 +1,12 @@
import re
from typing import Set
from starlette.types import ASGIApp, Message, Receive, Scope, Send
from fastapi import Request, Response
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.types import ASGIApp
class SecurityHeadersMiddleware:
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
"""
Middleware to add security headers to responses, with cache control
disabled by default for all endpoints except those explicitly allowed.
@@ -23,8 +25,6 @@ class SecurityHeadersMiddleware:
"/api/health",
"/api/v1/health",
"/api/status",
"/api/blocks",
"/api/v1/blocks",
# Public store/marketplace pages (read-only)
"/api/store/agents",
"/api/v1/store/agents",
@@ -49,7 +49,7 @@ class SecurityHeadersMiddleware:
}
def __init__(self, app: ASGIApp):
self.app = app
super().__init__(app)
# Compile regex patterns for wildcard matching
self.cacheable_patterns = [
re.compile(pattern.replace("*", "[^/]+"))
@@ -72,42 +72,22 @@ class SecurityHeadersMiddleware:
return False
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
"""Pure ASGI middleware implementation for better performance than BaseHTTPMiddleware."""
if scope["type"] != "http":
await self.app(scope, receive, send)
return
async def dispatch(self, request: Request, call_next):
response: Response = await call_next(request)
# Extract path from scope
path = scope["path"]
# Add general security headers
response.headers["X-Content-Type-Options"] = "nosniff"
response.headers["X-Frame-Options"] = "DENY"
response.headers["X-XSS-Protection"] = "1; mode=block"
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
async def send_wrapper(message: Message) -> None:
if message["type"] == "http.response.start":
# Add security headers to the response
headers = dict(message.get("headers", []))
# Default: Disable caching for all endpoints
# Only allow caching for explicitly permitted paths
if not self.is_cacheable_path(request.url.path):
response.headers["Cache-Control"] = (
"no-store, no-cache, must-revalidate, private"
)
response.headers["Pragma"] = "no-cache"
response.headers["Expires"] = "0"
# Add general security headers (HTTP spec requires proper capitalization)
headers[b"X-Content-Type-Options"] = b"nosniff"
headers[b"X-Frame-Options"] = b"DENY"
headers[b"X-XSS-Protection"] = b"1; mode=block"
headers[b"Referrer-Policy"] = b"strict-origin-when-cross-origin"
# Add noindex header for shared execution pages
if "/public/shared" in path:
headers[b"X-Robots-Tag"] = b"noindex, nofollow"
# Default: Disable caching for all endpoints
# Only allow caching for explicitly permitted paths
if not self.is_cacheable_path(path):
headers[b"Cache-Control"] = (
b"no-store, no-cache, must-revalidate, private"
)
headers[b"Pragma"] = b"no-cache"
headers[b"Expires"] = b"0"
# Convert headers back to list format
message["headers"] = list(headers.items())
await send(message)
await self.app(scope, receive, send_wrapper)
return response

View File

@@ -1,6 +1,5 @@
import contextlib
import logging
import platform
from enum import Enum
from typing import Any, Optional
@@ -12,7 +11,6 @@ import uvicorn
from autogpt_libs.auth import add_auth_responses_to_openapi
from autogpt_libs.auth import verify_settings as verify_auth_settings
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.routing import APIRoute
from prisma.errors import PrismaError
@@ -20,7 +18,6 @@ import backend.data.block
import backend.data.db
import backend.data.graph
import backend.data.user
import backend.integrations.webhooks.utils
import backend.server.routers.postmark.postmark
import backend.server.routers.v1
import backend.server.v2.admin.credit_admin_routes
@@ -39,7 +36,6 @@ import backend.util.settings
from backend.blocks.llm import LlmModel
from backend.data.model import Credentials
from backend.integrations.providers import ProviderName
from backend.monitoring.instrumentation import instrument_fastapi
from backend.server.external.api import external_app
from backend.server.middleware.security import SecurityHeadersMiddleware
from backend.util import json
@@ -72,26 +68,6 @@ async def lifespan_context(app: fastapi.FastAPI):
await backend.data.db.connect()
# Configure thread pool for FastAPI sync operation performance
# CRITICAL: FastAPI automatically runs ALL sync functions in this thread pool:
# - Any endpoint defined with 'def' (not async def)
# - Any dependency function defined with 'def' (not async def)
# - Manual run_in_threadpool() calls (like JWT decoding)
# Default pool size is only 40 threads, causing bottlenecks under high concurrency
config = backend.util.settings.Config()
try:
import anyio.to_thread
anyio.to_thread.current_default_thread_limiter().total_tokens = (
config.fastapi_thread_pool_size
)
logger.info(
f"Thread pool size set to {config.fastapi_thread_pool_size} for sync endpoint/dependency performance"
)
except (ImportError, AttributeError) as e:
logger.warning(f"Could not configure thread pool size: {e}")
# Continue without thread pool configuration
# Ensure SDK auto-registration is patched before initializing blocks
from backend.sdk.registry import AutoRegistry
@@ -102,8 +78,6 @@ async def lifespan_context(app: fastapi.FastAPI):
await backend.data.user.migrate_and_encrypt_user_integrations()
await backend.data.graph.fix_llm_provider_credentials()
await backend.data.graph.migrate_llm_models(LlmModel.GPT4O)
await backend.integrations.webhooks.utils.migrate_legacy_triggered_graphs()
with launch_darkly_context():
yield
@@ -162,22 +136,9 @@ app = fastapi.FastAPI(
app.add_middleware(SecurityHeadersMiddleware)
# Add GZip compression middleware for large responses (like /api/blocks)
app.add_middleware(GZipMiddleware, minimum_size=50_000) # 50KB threshold
# Add 401 responses to authenticated endpoints in OpenAPI spec
add_auth_responses_to_openapi(app)
# Add Prometheus instrumentation
instrument_fastapi(
app,
service_name="rest-api",
expose_endpoint=True,
endpoint="/metrics",
include_in_schema=settings.config.app_env
== backend.util.settings.AppEnvironment.LOCAL,
)
def handle_internal_http_error(status_code: int = 500, log_error: bool = True):
def handler(request: fastapi.Request, exc: Exception):
@@ -291,35 +252,25 @@ async def health():
class AgentServer(backend.util.service.AppProcess):
def run(self):
server_app = starlette.middleware.cors.CORSMiddleware(
app=app,
allow_origins=settings.config.backend_cors_allow_origins,
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
if settings.config.enable_cors_all_origins:
server_app = starlette.middleware.cors.CORSMiddleware(
app=app,
allow_origins=settings.config.backend_cors_allow_origins,
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
)
else:
logger.info("CORS is disabled")
server_app = app
uvicorn.run(
server_app,
host=backend.util.settings.Config().agent_api_host,
port=backend.util.settings.Config().agent_api_port,
log_config=None,
)
config = backend.util.settings.Config()
# Configure uvicorn with performance optimizations from Kludex FastAPI tips
uvicorn_config = {
"app": server_app,
"host": config.agent_api_host,
"port": config.agent_api_port,
"log_config": None,
# Use httptools for HTTP parsing (if available)
"http": "httptools",
# Only use uvloop on Unix-like systems (not supported on Windows)
"loop": "uvloop" if platform.system() != "Windows" else "auto",
}
# Only add debug in local environment (not supported in all uvicorn versions)
if config.app_env == backend.util.settings.AppEnvironment.LOCAL:
import os
# Enable asyncio debug mode via environment variable
os.environ["PYTHONASYNCIODEBUG"] = "1"
uvicorn.run(**uvicorn_config)
def cleanup(self):
super().cleanup()

View File

@@ -1,17 +1,14 @@
import asyncio
import base64
import logging
import time
import uuid
from collections import defaultdict
from datetime import datetime, timezone
from datetime import datetime
from typing import Annotated, Any, Sequence
import pydantic
import stripe
from autogpt_libs.auth import get_user_id, requires_user
from autogpt_libs.auth.jwt_utils import get_jwt_payload
from autogpt_libs.utils.cache import cached
from fastapi import (
APIRouter,
Body,
@@ -24,8 +21,6 @@ from fastapi import (
Security,
UploadFile,
)
from fastapi.concurrency import run_in_threadpool
from pydantic import BaseModel
from starlette.status import HTTP_204_NO_CONTENT, HTTP_404_NOT_FOUND
from typing_extensions import Optional, TypedDict
@@ -41,10 +36,10 @@ from backend.data.credit import (
RefundRequest,
TransactionHistory,
get_auto_top_up,
get_block_costs,
get_user_credit_model,
set_auto_top_up,
)
from backend.data.execution import UserContext
from backend.data.model import CredentialsMetaInput
from backend.data.notifications import NotificationPreference, NotificationPreferenceDTO
from backend.data.onboarding import (
@@ -68,11 +63,6 @@ from backend.integrations.webhooks.graph_lifecycle_hooks import (
on_graph_activate,
on_graph_deactivate,
)
from backend.monitoring.instrumentation import (
record_block_execution,
record_graph_execution,
record_graph_operation,
)
from backend.server.model import (
CreateAPIKeyRequest,
CreateAPIKeyResponse,
@@ -87,9 +77,9 @@ from backend.server.model import (
from backend.util.clients import get_scheduler_client
from backend.util.cloud_storage import get_cloud_storage_handler
from backend.util.exceptions import GraphValidationError, NotFoundError
from backend.util.json import dumps
from backend.util.settings import Settings
from backend.util.timezone_utils import (
convert_cron_to_utc,
convert_utc_time_to_user_timezone,
get_user_timezone_or_utc,
)
@@ -107,7 +97,6 @@ def _create_file_size_error(size_bytes: int, max_size_mb: int) -> HTTPException:
settings = Settings()
logger = logging.getLogger(__name__)
_user_credit_model = get_user_credit_model()
# Define the API routes
@@ -266,69 +255,18 @@ async def is_onboarding_enabled():
########################################################
def _compute_blocks_sync() -> str:
"""
Synchronous function to compute blocks data.
This does the heavy lifting: instantiate 226+ blocks, compute costs, serialize.
"""
from backend.data.credit import get_block_cost
block_classes = get_blocks()
result = []
for block_class in block_classes.values():
block_instance = block_class()
if not block_instance.disabled:
costs = get_block_cost(block_instance)
# Convert BlockCost BaseModel objects to dictionaries for JSON serialization
costs_dict = [
cost.model_dump() if isinstance(cost, BaseModel) else cost
for cost in costs
]
result.append({**block_instance.to_dict(), "costs": costs_dict})
# Use our JSON utility which properly handles complex types through to_dict conversion
return dumps(result)
@cached()
async def _get_cached_blocks() -> str:
"""
Async cached function with thundering herd protection.
On cache miss: runs heavy work in thread pool
On cache hit: returns cached string immediately (no thread pool needed)
"""
# Only run in thread pool on cache miss - cache hits return immediately
return await run_in_threadpool(_compute_blocks_sync)
@v1_router.get(
path="/blocks",
summary="List available blocks",
tags=["blocks"],
dependencies=[Security(requires_user)],
responses={
200: {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"items": {"additionalProperties": True, "type": "object"},
"type": "array",
"title": "Response Getv1List Available Blocks",
}
}
},
}
},
)
async def get_graph_blocks() -> Response:
# Cache hit: returns immediately, Cache miss: runs in thread pool
content = await _get_cached_blocks()
return Response(
content=content,
media_type="application/json",
)
def get_graph_blocks() -> Sequence[dict[Any, Any]]:
blocks = [block() for block in get_blocks().values()]
costs = get_block_costs()
return [
{**b.to_dict(), "costs": costs.get(b.id, [])} for b in blocks if not b.disabled
]
@v1_router.post(
@@ -337,45 +275,15 @@ async def get_graph_blocks() -> Response:
tags=["blocks"],
dependencies=[Security(requires_user)],
)
async def execute_graph_block(
block_id: str, data: BlockInput, user_id: Annotated[str, Security(get_user_id)]
) -> CompletedBlockOutput:
async def execute_graph_block(block_id: str, data: BlockInput) -> CompletedBlockOutput:
obj = get_block(block_id)
if not obj:
raise HTTPException(status_code=404, detail=f"Block #{block_id} not found.")
# Get user context for block execution
user = await get_user_by_id(user_id)
if not user:
raise HTTPException(status_code=404, detail="User not found.")
user_context = UserContext(timezone=user.timezone)
start_time = time.time()
try:
output = defaultdict(list)
async for name, data in obj.execute(
data,
user_context=user_context,
user_id=user_id,
# Note: graph_exec_id and graph_id are not available for direct block execution
):
output[name].append(data)
# Record successful block execution with duration
duration = time.time() - start_time
block_type = obj.__class__.__name__
record_block_execution(
block_type=block_type, status="success", duration=duration
)
return output
except Exception:
# Record failed block execution
duration = time.time() - start_time
block_type = obj.__class__.__name__
record_block_execution(block_type=block_type, status="error", duration=duration)
raise
output = defaultdict(list)
async for name, data in obj.execute(data):
output[name].append(data)
return output
@v1_router.post(
@@ -668,13 +576,7 @@ class DeleteGraphResponse(TypedDict):
async def list_graphs(
user_id: Annotated[str, Security(get_user_id)],
) -> Sequence[graph_db.GraphMeta]:
paginated_result = await graph_db.list_graphs_paginated(
user_id=user_id,
page=1,
page_size=250,
filter_by="active",
)
return paginated_result.graphs
return await graph_db.list_graphs(filter_by="active", user_id=user_id)
@v1_router.get(
@@ -877,7 +779,7 @@ async def execute_graph(
)
try:
result = await execution_utils.add_graph_execution(
return await execution_utils.add_graph_execution(
graph_id=graph_id,
user_id=user_id,
inputs=inputs,
@@ -885,16 +787,7 @@ async def execute_graph(
graph_version=graph_version,
graph_credentials_inputs=credentials_inputs,
)
# Record successful graph execution
record_graph_execution(graph_id=graph_id, status="success", user_id=user_id)
record_graph_operation(operation="execute", status="success")
return result
except GraphValidationError as e:
# Record failed graph execution
record_graph_execution(
graph_id=graph_id, status="validation_error", user_id=user_id
)
record_graph_operation(operation="execute", status="validation_error")
# Return structured validation errors that the frontend can parse
raise HTTPException(
status_code=400,
@@ -905,11 +798,6 @@ async def execute_graph(
"node_errors": e.node_errors,
},
)
except Exception:
# Record any other failures
record_graph_execution(graph_id=graph_id, status="error", user_id=user_id)
record_graph_operation(operation="execute", status="error")
raise
@v1_router.post(
@@ -963,12 +851,7 @@ async def _stop_graph_run(
async def list_graphs_executions(
user_id: Annotated[str, Security(get_user_id)],
) -> list[execution_db.GraphExecutionMeta]:
paginated_result = await execution_db.get_graph_executions_paginated(
user_id=user_id,
page=1,
page_size=250,
)
return paginated_result.executions
return await execution_db.get_graph_executions(user_id=user_id)
@v1_router.get(
@@ -1039,99 +922,6 @@ async def delete_graph_execution(
)
class ShareRequest(pydantic.BaseModel):
"""Optional request body for share endpoint."""
pass # Empty body is fine
class ShareResponse(pydantic.BaseModel):
"""Response from share endpoints."""
share_url: str
share_token: str
@v1_router.post(
"/graphs/{graph_id}/executions/{graph_exec_id}/share",
dependencies=[Security(requires_user)],
)
async def enable_execution_sharing(
graph_id: Annotated[str, Path],
graph_exec_id: Annotated[str, Path],
user_id: Annotated[str, Security(get_user_id)],
_body: ShareRequest = Body(default=ShareRequest()),
) -> ShareResponse:
"""Enable sharing for a graph execution."""
# Verify the execution belongs to the user
execution = await execution_db.get_graph_execution(
user_id=user_id, execution_id=graph_exec_id
)
if not execution:
raise HTTPException(status_code=404, detail="Execution not found")
# Generate a unique share token
share_token = str(uuid.uuid4())
# Update the execution with share info
await execution_db.update_graph_execution_share_status(
execution_id=graph_exec_id,
user_id=user_id,
is_shared=True,
share_token=share_token,
shared_at=datetime.now(timezone.utc),
)
# Return the share URL
frontend_url = Settings().config.frontend_base_url or "http://localhost:3000"
share_url = f"{frontend_url}/share/{share_token}"
return ShareResponse(share_url=share_url, share_token=share_token)
@v1_router.delete(
"/graphs/{graph_id}/executions/{graph_exec_id}/share",
status_code=HTTP_204_NO_CONTENT,
dependencies=[Security(requires_user)],
)
async def disable_execution_sharing(
graph_id: Annotated[str, Path],
graph_exec_id: Annotated[str, Path],
user_id: Annotated[str, Security(get_user_id)],
) -> None:
"""Disable sharing for a graph execution."""
# Verify the execution belongs to the user
execution = await execution_db.get_graph_execution(
user_id=user_id, execution_id=graph_exec_id
)
if not execution:
raise HTTPException(status_code=404, detail="Execution not found")
# Remove share info
await execution_db.update_graph_execution_share_status(
execution_id=graph_exec_id,
user_id=user_id,
is_shared=False,
share_token=None,
shared_at=None,
)
@v1_router.get("/public/shared/{share_token}")
async def get_shared_execution(
share_token: Annotated[
str,
Path(regex=r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"),
],
) -> execution_db.SharedExecutionResponse:
"""Get a shared graph execution by share token (no auth required)."""
execution = await execution_db.get_graph_execution_by_share_token(share_token)
if not execution:
raise HTTPException(status_code=404, detail="Shared execution not found")
return execution
########################################################
##################### Schedules ########################
########################################################
@@ -1143,10 +933,6 @@ class ScheduleCreationRequest(pydantic.BaseModel):
cron: str
inputs: dict[str, Any]
credentials: dict[str, CredentialsMetaInput] = pydantic.Field(default_factory=dict)
timezone: Optional[str] = pydantic.Field(
default=None,
description="User's timezone for scheduling (e.g., 'America/New_York'). If not provided, will use user's saved timezone or UTC.",
)
@v1_router.post(
@@ -1171,22 +957,26 @@ async def create_graph_execution_schedule(
detail=f"Graph #{graph_id} v{schedule_params.graph_version} not found.",
)
# Use timezone from request if provided, otherwise fetch from user profile
if schedule_params.timezone:
user_timezone = schedule_params.timezone
else:
user = await get_user_by_id(user_id)
user_timezone = get_user_timezone_or_utc(user.timezone if user else None)
user = await get_user_by_id(user_id)
user_timezone = get_user_timezone_or_utc(user.timezone if user else None)
# Convert cron expression from user timezone to UTC
try:
utc_cron = convert_cron_to_utc(schedule_params.cron, user_timezone)
except ValueError as e:
raise HTTPException(
status_code=400,
detail=f"Invalid cron expression for timezone {user_timezone}: {e}",
)
result = await get_scheduler_client().add_execution_schedule(
user_id=user_id,
graph_id=graph_id,
graph_version=graph.version,
name=schedule_params.name,
cron=schedule_params.cron,
cron=utc_cron, # Send UTC cron to scheduler
input_data=schedule_params.inputs,
input_credentials=schedule_params.credentials,
user_timezone=user_timezone,
)
# Convert the next_run_time back to user timezone for display
@@ -1208,11 +998,24 @@ async def list_graph_execution_schedules(
user_id: Annotated[str, Security(get_user_id)],
graph_id: str = Path(),
) -> list[scheduler.GraphExecutionJobInfo]:
return await get_scheduler_client().get_execution_schedules(
schedules = await get_scheduler_client().get_execution_schedules(
user_id=user_id,
graph_id=graph_id,
)
# Get user timezone for conversion
user = await get_user_by_id(user_id)
user_timezone = get_user_timezone_or_utc(user.timezone if user else None)
# Convert next_run_time to user timezone for display
for schedule in schedules:
if schedule.next_run_time:
schedule.next_run_time = convert_utc_time_to_user_timezone(
schedule.next_run_time, user_timezone
)
return schedules
@v1_router.get(
path="/schedules",
@@ -1223,7 +1026,20 @@ async def list_graph_execution_schedules(
async def list_all_graphs_execution_schedules(
user_id: Annotated[str, Security(get_user_id)],
) -> list[scheduler.GraphExecutionJobInfo]:
return await get_scheduler_client().get_execution_schedules(user_id=user_id)
schedules = await get_scheduler_client().get_execution_schedules(user_id=user_id)
# Get user timezone for conversion
user = await get_user_by_id(user_id)
user_timezone = get_user_timezone_or_utc(user.timezone if user else None)
# Convert UTC next_run_time to user timezone for display
for schedule in schedules:
if schedule.next_run_time:
schedule.next_run_time = convert_utc_time_to_user_timezone(
schedule.next_run_time, user_timezone
)
return schedules
@v1_router.delete(

Some files were not shown because too many files have changed in this diff Show More