diff --git a/.branchlet.json b/.branchlet.json index cc13ff9f74..d02cd60e20 100644 --- a/.branchlet.json +++ b/.branchlet.json @@ -29,8 +29,7 @@ "postCreateCmd": [ "cd autogpt_platform/autogpt_libs && poetry install", "cd autogpt_platform/backend && poetry install && poetry run prisma generate", - "cd autogpt_platform/frontend && pnpm install", - "cd docs && pip install -r requirements.txt" + "cd autogpt_platform/frontend && pnpm install" ], "terminalCommand": "code .", "deleteBranchWithWorktree": false diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 870e6b4b0a..3c72eaae18 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -160,7 +160,7 @@ pnpm storybook # Start component development server **Backend Entry Points:** -- `backend/backend/server/server.py` - FastAPI application setup +- `backend/backend/api/rest_api.py` - FastAPI application setup - `backend/backend/data/` - Database models and user management - `backend/blocks/` - Agent execution blocks and logic @@ -219,7 +219,7 @@ Agents are built using a visual block-based system where each block performs a s ### API Development -1. Update routes in `/backend/backend/server/routers/` +1. Update routes in `/backend/backend/api/features/` 2. Add/update Pydantic models in same directory 3. Write tests alongside route files 4. For `data/*.py` changes, validate user ID checks @@ -285,7 +285,7 @@ Agents are built using a visual block-based system where each block performs a s ### Security Guidelines -**Cache Protection Middleware** (`/backend/backend/server/middleware/security.py`): +**Cache Protection Middleware** (`/backend/backend/api/middleware/security.py`): - Default: Disables caching for ALL endpoints with `Cache-Control: no-store, no-cache, must-revalidate, private` - Uses allow list approach for cacheable paths (static assets, health checks, public pages) diff --git a/.gitignore b/.gitignore index dfce8ba810..1a2291b516 100644 --- a/.gitignore +++ b/.gitignore @@ -178,4 +178,5 @@ autogpt_platform/backend/settings.py *.ign.* .test-contents .claude/settings.local.json +CLAUDE.local.md /autogpt_platform/backend/logs diff --git a/AGENTS.md b/AGENTS.md index cd176f8a2d..202c4c6e02 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,7 +16,6 @@ See `docs/content/platform/getting-started.md` for setup instructions. - Format Python code with `poetry run format`. - Format frontend code using `pnpm format`. - ## Frontend guidelines: See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference: @@ -33,14 +32,17 @@ See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference: 4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only 5. **Testing**: Add Storybook stories for new components, Playwright for E2E 6. **Code conventions**: Function declarations (not arrow functions) for components/handlers + - Component props should be `interface Props { ... }` (not exported) unless the interface needs to be used outside the component - Separate render logic from business logic (component.tsx + useComponent.ts + helpers.ts) - Colocate state when possible and avoid creating large components, use sub-components ( local `/components` folder next to the parent component ) when sensible - Avoid large hooks, abstract logic into `helpers.ts` files when sensible - Use function declarations for components, arrow functions only for callbacks - No barrel files or `index.ts` re-exports -- Do not use `useCallback` or `useMemo` unless strictly needed - Avoid comments at all times unless the code is very complex +- Do not use `useCallback` or `useMemo` unless asked to optimise a given function +- Do not type hook returns, let Typescript infer as much as possible +- Never type with `any`, if not types available use `unknown` ## Testing @@ -49,22 +51,8 @@ See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference: Always run the relevant linters and tests before committing. Use conventional commit messages for all commits (e.g. `feat(backend): add API`). - Types: - - feat - - fix - - refactor - - ci - - dx (developer experience) - Scopes: - - platform - - platform/library - - platform/marketplace - - backend - - backend/executor - - frontend - - frontend/library - - frontend/marketplace - - blocks +Types: - feat - fix - refactor - ci - dx (developer experience) +Scopes: - platform - platform/library - platform/marketplace - backend - backend/executor - frontend - frontend/library - frontend/marketplace - blocks ## Pull requests diff --git a/README.md b/README.md index 3572fe318b..349d8818ef 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Before proceeding with the installation, ensure your system meets the following ### Updated Setup Instructions: We've moved to a fully maintained and regularly updated documentation site. -👉 [Follow the official self-hosting guide here](https://docs.agpt.co/platform/getting-started/) +👉 [Follow the official self-hosting guide here](https://agpt.co/docs/platform/getting-started/getting-started) This tutorial assumes you have Docker, VSCode, git and npm installed. diff --git a/autogpt_platform/CLAUDE.md b/autogpt_platform/CLAUDE.md index 9690178587..62adbdaefa 100644 --- a/autogpt_platform/CLAUDE.md +++ b/autogpt_platform/CLAUDE.md @@ -6,152 +6,30 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co AutoGPT Platform is a monorepo containing: -- **Backend** (`/backend`): Python FastAPI server with async support -- **Frontend** (`/frontend`): Next.js React application -- **Shared Libraries** (`/autogpt_libs`): Common Python utilities +- **Backend** (`backend`): Python FastAPI server with async support +- **Frontend** (`frontend`): Next.js React application +- **Shared Libraries** (`autogpt_libs`): Common Python utilities -## Essential Commands +## Component Documentation -### Backend Development +- **Backend**: See @backend/CLAUDE.md for backend-specific commands, architecture, and development tasks +- **Frontend**: See @frontend/CLAUDE.md for frontend-specific commands, architecture, and development patterns -```bash -# Install dependencies -cd backend && poetry install - -# Run database migrations -poetry run prisma migrate dev - -# Start all services (database, redis, rabbitmq, clamav) -docker compose up -d - -# Run the backend server -poetry run serve - -# Run tests -poetry run test - -# Run specific test -poetry run pytest path/to/test_file.py::test_function_name - -# Run block tests (tests that validate all blocks work correctly) -poetry run pytest backend/blocks/test/test_block.py -xvs - -# Run tests for a specific block (e.g., GetCurrentTimeBlock) -poetry run pytest 'backend/blocks/test/test_block.py::test_available_blocks[GetCurrentTimeBlock]' -xvs - -# Lint and format -# prefer format if you want to just "fix" it and only get the errors that can't be autofixed -poetry run format # Black + isort -poetry run lint # ruff -``` - -More details can be found in TESTING.md - -#### Creating/Updating Snapshots - -When you first write a test or when the expected output changes: - -```bash -poetry run pytest path/to/test.py --snapshot-update -``` - -⚠️ **Important**: Always review snapshot changes before committing! Use `git diff` to verify the changes are expected. - -### Frontend Development - -```bash -# Install dependencies -cd frontend && pnpm i - -# Generate API client from OpenAPI spec -pnpm generate:api - -# Start development server -pnpm dev - -# Run E2E tests -pnpm test - -# Run Storybook for component development -pnpm storybook - -# Build production -pnpm build - -# Format and lint -pnpm format - -# Type checking -pnpm types -``` - -**📖 Complete Guide**: See `/frontend/CONTRIBUTING.md` and `/frontend/.cursorrules` for comprehensive frontend patterns. - -**Key Frontend Conventions:** - -- Separate render logic from data/behavior in components -- Use generated API hooks from `@/app/api/__generated__/endpoints/` -- Use function declarations (not arrow functions) for components/handlers -- Use design system components from `src/components/` (atoms, molecules, organisms) -- Only use Phosphor Icons -- Never use `src/components/__legacy__/*` or deprecated `BackendAPI` - -## Architecture Overview - -### Backend Architecture - -- **API Layer**: FastAPI with REST and WebSocket endpoints -- **Database**: PostgreSQL with Prisma ORM, includes pgvector for embeddings -- **Queue System**: RabbitMQ for async task processing -- **Execution Engine**: Separate executor service processes agent workflows -- **Authentication**: JWT-based with Supabase integration -- **Security**: Cache protection middleware prevents sensitive data caching in browsers/proxies - -### Frontend Architecture - -- **Framework**: Next.js 15 App Router (client-first approach) -- **Data Fetching**: Type-safe generated API hooks via Orval + React Query -- **State Management**: React Query for server state, co-located UI state in components/hooks -- **Component Structure**: Separate render logic (`.tsx`) from business logic (`use*.ts` hooks) -- **Workflow Builder**: Visual graph editor using @xyflow/react -- **UI Components**: shadcn/ui (Radix UI primitives) with Tailwind CSS styling -- **Icons**: Phosphor Icons only -- **Feature Flags**: LaunchDarkly integration -- **Error Handling**: ErrorCard for render errors, toast for mutations, Sentry for exceptions -- **Testing**: Playwright for E2E, Storybook for component development - -### Key Concepts +## Key Concepts 1. **Agent Graphs**: Workflow definitions stored as JSON, executed by the backend -2. **Blocks**: Reusable components in `/backend/blocks/` that perform specific tasks +2. **Blocks**: Reusable components in `backend/backend/blocks/` that perform specific tasks 3. **Integrations**: OAuth and API connections stored per user 4. **Store**: Marketplace for sharing agent templates 5. **Virus Scanning**: ClamAV integration for file upload security -### Testing Approach - -- Backend uses pytest with snapshot testing for API responses -- Test files are colocated with source files (`*_test.py`) -- Frontend uses Playwright for E2E tests -- Component testing via Storybook - -### Database Schema - -Key models (defined in `/backend/schema.prisma`): - -- `User`: Authentication and profile data -- `AgentGraph`: Workflow definitions with version control -- `AgentGraphExecution`: Execution history and results -- `AgentNode`: Individual nodes in a workflow -- `StoreListing`: Marketplace listings for sharing agents - ### Environment Configuration #### Configuration Files -- **Backend**: `/backend/.env.default` (defaults) → `/backend/.env` (user overrides) -- **Frontend**: `/frontend/.env.default` (defaults) → `/frontend/.env` (user overrides) -- **Platform**: `/.env.default` (Supabase/shared defaults) → `/.env` (user overrides) +- **Backend**: `backend/.env.default` (defaults) → `backend/.env` (user overrides) +- **Frontend**: `frontend/.env.default` (defaults) → `frontend/.env` (user overrides) +- **Platform**: `.env.default` (Supabase/shared defaults) → `.env` (user overrides) #### Docker Environment Loading Order @@ -167,127 +45,12 @@ Key models (defined in `/backend/schema.prisma`): - Backend/Frontend services use YAML anchors for consistent configuration - Supabase services (`db/docker/docker-compose.yml`) follow the same pattern -### Common Development Tasks - -**Adding a new block:** - -Follow the comprehensive [Block SDK Guide](../../../docs/content/platform/block-sdk-guide.md) which covers: - -- Provider configuration with `ProviderBuilder` -- Block schema definition -- Authentication (API keys, OAuth, webhooks) -- Testing and validation -- File organization - -Quick steps: - -1. Create new file in `/backend/backend/blocks/` -2. Configure provider using `ProviderBuilder` in `_config.py` -3. Inherit from `Block` base class -4. Define input/output schemas using `BlockSchema` -5. Implement async `run` method -6. Generate unique block ID using `uuid.uuid4()` -7. Test with `poetry run pytest backend/blocks/test/test_block.py` - -Note: when making many new blocks analyze the interfaces for each of these blocks and picture if they would go well together in a graph based editor or would they struggle to connect productively? -ex: do the inputs and outputs tie well together? - -If you get any pushback or hit complex block conditions check the new_blocks guide in the docs. - -**Handling files in blocks with `store_media_file()`:** - -When blocks need to work with files (images, videos, documents), use `store_media_file()` from `backend.util.file`. The `return_format` parameter determines what you get back: - -| Format | Use When | Returns | -|--------|----------|---------| -| `"for_local_processing"` | Processing with local tools (ffmpeg, MoviePy, PIL) | Local file path (e.g., `"image.png"`) | -| `"for_external_api"` | Sending content to external APIs (Replicate, OpenAI) | Data URI (e.g., `"data:image/png;base64,..."`) | -| `"for_block_output"` | Returning output from your block | Smart: `workspace://` in CoPilot, data URI in graphs | - -**Examples:** -```python -# INPUT: Need to process file locally with ffmpeg -local_path = await store_media_file( - file=input_data.video, - execution_context=execution_context, - return_format="for_local_processing", -) -# local_path = "video.mp4" - use with Path/ffmpeg/etc - -# INPUT: Need to send to external API like Replicate -image_b64 = await store_media_file( - file=input_data.image, - execution_context=execution_context, - return_format="for_external_api", -) -# image_b64 = "..." - send to API - -# OUTPUT: Returning result from block -result_url = await store_media_file( - file=generated_image_url, - execution_context=execution_context, - return_format="for_block_output", -) -yield "image_url", result_url -# In CoPilot: result_url = "workspace://abc123" -# In graphs: result_url = "data:image/png;base64,..." -``` - -**Key points:** -- `for_block_output` is the ONLY format that auto-adapts to execution context -- Always use `for_block_output` for block outputs unless you have a specific reason not to -- Never hardcode workspace checks - let `for_block_output` handle it - -**Modifying the API:** - -1. Update route in `/backend/backend/server/routers/` -2. Add/update Pydantic models in same directory -3. Write tests alongside the route file -4. Run `poetry run test` to verify - -### Frontend guidelines: - -See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference: - -1. **Pages**: Create in `src/app/(platform)/feature-name/page.tsx` - - Add `usePageName.ts` hook for logic - - Put sub-components in local `components/` folder -2. **Components**: Structure as `ComponentName/ComponentName.tsx` + `useComponentName.ts` + `helpers.ts` - - Use design system components from `src/components/` (atoms, molecules, organisms) - - Never use `src/components/__legacy__/*` -3. **Data fetching**: Use generated API hooks from `@/app/api/__generated__/endpoints/` - - Regenerate with `pnpm generate:api` - - Pattern: `use{Method}{Version}{OperationName}` -4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only -5. **Testing**: Add Storybook stories for new components, Playwright for E2E -6. **Code conventions**: Function declarations (not arrow functions) for components/handlers -- Component props should be `interface Props { ... }` (not exported) unless the interface needs to be used outside the component -- Separate render logic from business logic (component.tsx + useComponent.ts + helpers.ts) -- Colocate state when possible and avoid creating large components, use sub-components ( local `/components` folder next to the parent component ) when sensible -- Avoid large hooks, abstract logic into `helpers.ts` files when sensible -- Use function declarations for components, arrow functions only for callbacks -- No barrel files or `index.ts` re-exports -- Do not use `useCallback` or `useMemo` unless strictly needed -- Avoid comments at all times unless the code is very complex - -### Security Implementation - -**Cache Protection Middleware:** - -- Located in `/backend/backend/server/middleware/security.py` -- Default behavior: Disables caching for ALL endpoints with `Cache-Control: no-store, no-cache, must-revalidate, private` -- Uses an allow list approach - only explicitly permitted paths can be cached -- Cacheable paths include: static assets (`/static/*`, `/_next/static/*`), health checks, public store pages, documentation -- Prevents sensitive data (auth tokens, API keys, user data) from being cached by browsers/proxies -- To allow caching for a new endpoint, add it to `CACHEABLE_PATHS` in the middleware -- Applied to both main API server and external API applications - ### Creating Pull Requests -- Create the PR aginst the `dev` branch of the repository. -- Ensure the branch name is descriptive (e.g., `feature/add-new-block`)/ -- Use conventional commit messages (see below)/ -- Fill out the .github/PULL_REQUEST_TEMPLATE.md template as the PR description/ +- Create the PR against the `dev` branch of the repository. +- Ensure the branch name is descriptive (e.g., `feature/add-new-block`) +- Use conventional commit messages (see below) +- Fill out the .github/PULL_REQUEST_TEMPLATE.md template as the PR description - Run the github pre-commit hooks to ensure code quality. ### Reviewing/Revising Pull Requests diff --git a/autogpt_platform/backend/CLAUDE.md b/autogpt_platform/backend/CLAUDE.md new file mode 100644 index 0000000000..53d52bb4d3 --- /dev/null +++ b/autogpt_platform/backend/CLAUDE.md @@ -0,0 +1,170 @@ +# CLAUDE.md - Backend + +This file provides guidance to Claude Code when working with the backend. + +## Essential Commands + +To run something with Python package dependencies you MUST use `poetry run ...`. + +```bash +# Install dependencies +poetry install + +# Run database migrations +poetry run prisma migrate dev + +# Start all services (database, redis, rabbitmq, clamav) +docker compose up -d + +# Run the backend as a whole +poetry run app + +# Run tests +poetry run test + +# Run specific test +poetry run pytest path/to/test_file.py::test_function_name + +# Run block tests (tests that validate all blocks work correctly) +poetry run pytest backend/blocks/test/test_block.py -xvs + +# Run tests for a specific block (e.g., GetCurrentTimeBlock) +poetry run pytest 'backend/blocks/test/test_block.py::test_available_blocks[GetCurrentTimeBlock]' -xvs + +# Lint and format +# prefer format if you want to just "fix" it and only get the errors that can't be autofixed +poetry run format # Black + isort +poetry run lint # ruff +``` + +More details can be found in @TESTING.md + +### Creating/Updating Snapshots + +When you first write a test or when the expected output changes: + +```bash +poetry run pytest path/to/test.py --snapshot-update +``` + +⚠️ **Important**: Always review snapshot changes before committing! Use `git diff` to verify the changes are expected. + +## Architecture + +- **API Layer**: FastAPI with REST and WebSocket endpoints +- **Database**: PostgreSQL with Prisma ORM, includes pgvector for embeddings +- **Queue System**: RabbitMQ for async task processing +- **Execution Engine**: Separate executor service processes agent workflows +- **Authentication**: JWT-based with Supabase integration +- **Security**: Cache protection middleware prevents sensitive data caching in browsers/proxies + +## Testing Approach + +- Uses pytest with snapshot testing for API responses +- Test files are colocated with source files (`*_test.py`) + +## Database Schema + +Key models (defined in `schema.prisma`): + +- `User`: Authentication and profile data +- `AgentGraph`: Workflow definitions with version control +- `AgentGraphExecution`: Execution history and results +- `AgentNode`: Individual nodes in a workflow +- `StoreListing`: Marketplace listings for sharing agents + +## Environment Configuration + +- **Backend**: `.env.default` (defaults) → `.env` (user overrides) + +## Common Development Tasks + +### Adding a new block + +Follow the comprehensive [Block SDK Guide](@../../docs/content/platform/block-sdk-guide.md) which covers: + +- Provider configuration with `ProviderBuilder` +- Block schema definition +- Authentication (API keys, OAuth, webhooks) +- Testing and validation +- File organization + +Quick steps: + +1. Create new file in `backend/blocks/` +2. Configure provider using `ProviderBuilder` in `_config.py` +3. Inherit from `Block` base class +4. Define input/output schemas using `BlockSchema` +5. Implement async `run` method +6. Generate unique block ID using `uuid.uuid4()` +7. Test with `poetry run pytest backend/blocks/test/test_block.py` + +Note: when making many new blocks analyze the interfaces for each of these blocks and picture if they would go well together in a graph-based editor or would they struggle to connect productively? +ex: do the inputs and outputs tie well together? + +If you get any pushback or hit complex block conditions check the new_blocks guide in the docs. + +#### Handling files in blocks with `store_media_file()` + +When blocks need to work with files (images, videos, documents), use `store_media_file()` from `backend.util.file`. The `return_format` parameter determines what you get back: + +| Format | Use When | Returns | +|--------|----------|---------| +| `"for_local_processing"` | Processing with local tools (ffmpeg, MoviePy, PIL) | Local file path (e.g., `"image.png"`) | +| `"for_external_api"` | Sending content to external APIs (Replicate, OpenAI) | Data URI (e.g., `"data:image/png;base64,..."`) | +| `"for_block_output"` | Returning output from your block | Smart: `workspace://` in CoPilot, data URI in graphs | + +**Examples:** + +```python +# INPUT: Need to process file locally with ffmpeg +local_path = await store_media_file( + file=input_data.video, + execution_context=execution_context, + return_format="for_local_processing", +) +# local_path = "video.mp4" - use with Path/ffmpeg/etc + +# INPUT: Need to send to external API like Replicate +image_b64 = await store_media_file( + file=input_data.image, + execution_context=execution_context, + return_format="for_external_api", +) +# image_b64 = "..." - send to API + +# OUTPUT: Returning result from block +result_url = await store_media_file( + file=generated_image_url, + execution_context=execution_context, + return_format="for_block_output", +) +yield "image_url", result_url +# In CoPilot: result_url = "workspace://abc123" +# In graphs: result_url = "data:image/png;base64,..." +``` + +**Key points:** + +- `for_block_output` is the ONLY format that auto-adapts to execution context +- Always use `for_block_output` for block outputs unless you have a specific reason not to +- Never hardcode workspace checks - let `for_block_output` handle it + +### Modifying the API + +1. Update route in `backend/api/features/` +2. Add/update Pydantic models in same directory +3. Write tests alongside the route file +4. Run `poetry run test` to verify + +## Security Implementation + +### Cache Protection Middleware + +- Located in `backend/api/middleware/security.py` +- Default behavior: Disables caching for ALL endpoints with `Cache-Control: no-store, no-cache, must-revalidate, private` +- Uses an allow list approach - only explicitly permitted paths can be cached +- Cacheable paths include: static assets (`static/*`, `_next/static/*`), health checks, public store pages, documentation +- Prevents sensitive data (auth tokens, API keys, user data) from being cached by browsers/proxies +- To allow caching for a new endpoint, add it to `CACHEABLE_PATHS` in the middleware +- Applied to both main API server and external API applications diff --git a/autogpt_platform/backend/TESTING.md b/autogpt_platform/backend/TESTING.md index a3a5db68ef..2e09144485 100644 --- a/autogpt_platform/backend/TESTING.md +++ b/autogpt_platform/backend/TESTING.md @@ -138,7 +138,7 @@ If the test doesn't need the `user_id` specifically, mocking is not necessary as #### Using Global Auth Fixtures -Two global auth fixtures are provided by `backend/server/conftest.py`: +Two global auth fixtures are provided by `backend/api/conftest.py`: - `mock_jwt_user` - Regular user with `test_user_id` ("test-user-id") - `mock_jwt_admin` - Admin user with `admin_user_id` ("admin-user-id") diff --git a/autogpt_platform/backend/backend/api/features/builder/routes.py b/autogpt_platform/backend/backend/api/features/builder/routes.py index 1e78a1d599..5db1b5bfaa 100644 --- a/autogpt_platform/backend/backend/api/features/builder/routes.py +++ b/autogpt_platform/backend/backend/api/features/builder/routes.py @@ -19,7 +19,7 @@ router = fastapi.APIRouter( ) -# Taken from backend/server/v2/store/db.py +# Taken from backend/api/features/store/db.py def sanitize_query(query: str | None) -> str | None: if query is None: return query diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py index 20216162b5..f1f3156713 100644 --- a/autogpt_platform/backend/backend/api/features/chat/service.py +++ b/autogpt_platform/backend/backend/api/features/chat/service.py @@ -3,7 +3,8 @@ import logging import time from asyncio import CancelledError from collections.abc import AsyncGenerator -from typing import Any +from dataclasses import dataclass +from typing import Any, cast import openai import orjson @@ -15,7 +16,14 @@ from openai import ( PermissionDeniedError, RateLimitError, ) -from openai.types.chat import ChatCompletionChunk, ChatCompletionToolParam +from openai.types.chat import ( + ChatCompletionAssistantMessageParam, + ChatCompletionChunk, + ChatCompletionMessageParam, + ChatCompletionStreamOptionsParam, + ChatCompletionSystemMessageParam, + ChatCompletionToolParam, +) from backend.data.redis_client import get_redis_async from backend.data.understanding import ( @@ -23,6 +31,7 @@ from backend.data.understanding import ( get_business_understanding, ) from backend.util.exceptions import NotFoundError +from backend.util.prompt import estimate_token_count from backend.util.settings import Settings from . import db as chat_db @@ -794,6 +803,201 @@ def _is_region_blocked_error(error: Exception) -> bool: return "not available in your region" in str(error).lower() +# Context window management constants +TOKEN_THRESHOLD = 120_000 +KEEP_RECENT_MESSAGES = 15 + + +@dataclass +class ContextWindowResult: + """Result of context window management.""" + + messages: list[dict[str, Any]] + token_count: int + was_compacted: bool + error: str | None = None + + +def _messages_to_dicts(messages: list) -> list[dict[str, Any]]: + """Convert message objects to dicts, filtering None values. + + Handles both TypedDict (dict-like) and other message formats. + """ + result = [] + for msg in messages: + if msg is None: + continue + if isinstance(msg, dict): + msg_dict = {k: v for k, v in msg.items() if v is not None} + else: + msg_dict = dict(msg) + result.append(msg_dict) + return result + + +async def _manage_context_window( + messages: list, + model: str, + api_key: str | None = None, + base_url: str | None = None, +) -> ContextWindowResult: + """ + Manage context window by summarizing old messages if token count exceeds threshold. + + This function handles context compaction for LLM calls by: + 1. Counting tokens in the message list + 2. If over threshold, summarizing old messages while keeping recent ones + 3. Ensuring tool_call/tool_response pairs stay intact + 4. Progressively reducing message count if still over limit + + Args: + messages: List of messages in OpenAI format (with system prompt if present) + model: Model name for token counting + api_key: API key for summarization calls + base_url: Base URL for summarization calls + + Returns: + ContextWindowResult with compacted messages and metadata + """ + if not messages: + return ContextWindowResult([], 0, False, "No messages to compact") + + messages_dict = _messages_to_dicts(messages) + + # Normalize model name for token counting (tiktoken only supports OpenAI models) + token_count_model = model.split("/")[-1] if "/" in model else model + if "claude" in token_count_model.lower() or not any( + known in token_count_model.lower() + for known in ["gpt", "o1", "chatgpt", "text-"] + ): + token_count_model = "gpt-4o" + + try: + token_count = estimate_token_count(messages_dict, model=token_count_model) + except Exception as e: + logger.warning(f"Token counting failed: {e}. Using gpt-4o approximation.") + token_count_model = "gpt-4o" + token_count = estimate_token_count(messages_dict, model=token_count_model) + + if token_count <= TOKEN_THRESHOLD: + return ContextWindowResult(messages, token_count, False) + + has_system_prompt = messages[0].get("role") == "system" + slice_start = max(0, len(messages_dict) - KEEP_RECENT_MESSAGES) + recent_messages = _ensure_tool_pairs_intact( + messages_dict[-KEEP_RECENT_MESSAGES:], messages_dict, slice_start + ) + + # Determine old messages to summarize (explicit bounds to avoid slice edge cases) + system_msg = messages[0] if has_system_prompt else None + if has_system_prompt: + old_messages_dict = ( + messages_dict[1:-KEEP_RECENT_MESSAGES] + if len(messages_dict) > KEEP_RECENT_MESSAGES + 1 + else [] + ) + else: + old_messages_dict = ( + messages_dict[:-KEEP_RECENT_MESSAGES] + if len(messages_dict) > KEEP_RECENT_MESSAGES + else [] + ) + + # Try to summarize old messages, fall back to truncation on failure + summary_msg = None + if old_messages_dict: + try: + summary_text = await _summarize_messages( + old_messages_dict, model=model, api_key=api_key, base_url=base_url + ) + summary_msg = ChatCompletionAssistantMessageParam( + role="assistant", + content=f"[Previous conversation summary — for context only]: {summary_text}", + ) + base = [system_msg, summary_msg] if has_system_prompt else [summary_msg] + messages = base + recent_messages + logger.info( + f"Context summarized: {token_count} tokens, " + f"summarized {len(old_messages_dict)} msgs, kept {KEEP_RECENT_MESSAGES}" + ) + except Exception as e: + logger.warning(f"Summarization failed, falling back to truncation: {e}") + messages = ( + [system_msg] + recent_messages if has_system_prompt else recent_messages + ) + else: + logger.warning( + f"Token count {token_count} exceeds threshold but no old messages to summarize" + ) + + new_token_count = estimate_token_count( + _messages_to_dicts(messages), model=token_count_model + ) + + # Progressive truncation if still over limit + if new_token_count > TOKEN_THRESHOLD: + logger.warning( + f"Still over limit: {new_token_count} tokens. Reducing messages." + ) + base_msgs = ( + recent_messages + if old_messages_dict + else (messages_dict[1:] if has_system_prompt else messages_dict) + ) + + def build_messages(recent: list) -> list: + """Build message list with optional system prompt and summary.""" + prefix = [] + if has_system_prompt and system_msg: + prefix.append(system_msg) + if summary_msg: + prefix.append(summary_msg) + return prefix + recent + + for keep_count in [12, 10, 8, 5, 3, 2, 1, 0]: + if keep_count == 0: + messages = build_messages([]) + if not messages: + continue + elif len(base_msgs) < keep_count: + continue + else: + reduced = _ensure_tool_pairs_intact( + base_msgs[-keep_count:], + base_msgs, + max(0, len(base_msgs) - keep_count), + ) + messages = build_messages(reduced) + + new_token_count = estimate_token_count( + _messages_to_dicts(messages), model=token_count_model + ) + if new_token_count <= TOKEN_THRESHOLD: + logger.info( + f"Reduced to {keep_count} messages, {new_token_count} tokens" + ) + break + else: + logger.error( + f"Cannot reduce below threshold. Final: {new_token_count} tokens" + ) + if has_system_prompt and len(messages) > 1: + messages = messages[1:] + logger.critical("Dropped system prompt as last resort") + return ContextWindowResult( + messages, new_token_count, True, "System prompt dropped" + ) + # No system prompt to drop - return error so callers don't proceed with oversized context + return ContextWindowResult( + messages, + new_token_count, + True, + "Unable to reduce context below token limit", + ) + + return ContextWindowResult(messages, new_token_count, True) + + async def _summarize_messages( messages: list, model: str, @@ -1022,11 +1226,8 @@ async def _stream_chat_chunks( logger.info("Starting pure chat stream") - # Build messages with system prompt prepended messages = session.to_openai_messages() if system_prompt: - from openai.types.chat import ChatCompletionSystemMessageParam - system_message = ChatCompletionSystemMessageParam( role="system", content=system_prompt, @@ -1034,314 +1235,38 @@ async def _stream_chat_chunks( messages = [system_message] + messages # Apply context window management - token_count = 0 # Initialize for exception handler - try: - from backend.util.prompt import estimate_token_count + context_result = await _manage_context_window( + messages=messages, + model=model, + api_key=config.api_key, + base_url=config.base_url, + ) - # Convert to dict for token counting - # OpenAI message types are TypedDicts, so they're already dict-like - messages_dict = [] - for msg in messages: - # TypedDict objects are already dicts, just filter None values - if isinstance(msg, dict): - msg_dict = {k: v for k, v in msg.items() if v is not None} - else: - # Fallback for unexpected types - msg_dict = dict(msg) - messages_dict.append(msg_dict) - - # Estimate tokens using appropriate tokenizer - # Normalize model name for token counting (tiktoken only supports OpenAI models) - token_count_model = model - if "/" in model: - # Strip provider prefix (e.g., "anthropic/claude-opus-4.5" -> "claude-opus-4.5") - token_count_model = model.split("/")[-1] - - # For Claude and other non-OpenAI models, approximate with gpt-4o tokenizer - # Most modern LLMs have similar tokenization (~1 token per 4 chars) - if "claude" in token_count_model.lower() or not any( - known in token_count_model.lower() - for known in ["gpt", "o1", "chatgpt", "text-"] - ): - token_count_model = "gpt-4o" - - # Attempt token counting with error handling - try: - token_count = estimate_token_count(messages_dict, model=token_count_model) - except Exception as token_error: - # If token counting fails, use gpt-4o as fallback approximation - logger.warning( - f"Token counting failed for model {token_count_model}: {token_error}. " - "Using gpt-4o approximation." - ) - token_count = estimate_token_count(messages_dict, model="gpt-4o") - - # If over threshold, summarize old messages - if token_count > 120_000: - KEEP_RECENT = 15 - - # Check if we have a system prompt at the start - has_system_prompt = ( - len(messages) > 0 and messages[0].get("role") == "system" - ) - - # Always attempt mitigation when over limit, even with few messages - if messages: - # Split messages based on whether system prompt exists - # Calculate start index for the slice - slice_start = max(0, len(messages_dict) - KEEP_RECENT) - recent_messages = messages_dict[-KEEP_RECENT:] - - # Ensure tool_call/tool_response pairs stay together - # This prevents API errors from orphan tool responses - recent_messages = _ensure_tool_pairs_intact( - recent_messages, messages_dict, slice_start - ) - - if has_system_prompt: - # Keep system prompt separate, summarize everything between system and recent - system_msg = messages[0] - old_messages_dict = messages_dict[1:-KEEP_RECENT] - else: - # No system prompt, summarize everything except recent - system_msg = None - old_messages_dict = messages_dict[:-KEEP_RECENT] - - # Summarize any non-empty old messages (no minimum threshold) - # If we're over the token limit, we need to compress whatever we can - if old_messages_dict: - # Summarize old messages using the same model as chat - summary_text = await _summarize_messages( - old_messages_dict, - model=model, - api_key=config.api_key, - base_url=config.base_url, - ) - - # Build new message list - # Use assistant role (not system) to prevent privilege escalation - # of user-influenced content to instruction-level authority - from openai.types.chat import ChatCompletionAssistantMessageParam - - summary_msg = ChatCompletionAssistantMessageParam( - role="assistant", - content=( - "[Previous conversation summary — for context only]: " - f"{summary_text}" - ), - ) - - # Rebuild messages based on whether we have a system prompt - if has_system_prompt: - # system_prompt + summary + recent_messages - messages = [system_msg, summary_msg] + recent_messages - else: - # summary + recent_messages (no original system prompt) - messages = [summary_msg] + recent_messages - - logger.info( - f"Context summarized: {token_count} tokens, " - f"summarized {len(old_messages_dict)} old messages, " - f"kept last {KEEP_RECENT} messages" - ) - - # Fallback: If still over limit after summarization, progressively drop recent messages - # This handles edge cases where recent messages are extremely large - new_messages_dict = [] - for msg in messages: - if isinstance(msg, dict): - msg_dict = {k: v for k, v in msg.items() if v is not None} - else: - msg_dict = dict(msg) - new_messages_dict.append(msg_dict) - - new_token_count = estimate_token_count( - new_messages_dict, model=token_count_model - ) - - if new_token_count > 120_000: - # Still over limit - progressively reduce KEEP_RECENT - logger.warning( - f"Still over limit after summarization: {new_token_count} tokens. " - "Reducing number of recent messages kept." - ) - - for keep_count in [12, 10, 8, 5, 3, 2, 1, 0]: - if keep_count == 0: - # Try with just system prompt + summary (no recent messages) - if has_system_prompt: - messages = [system_msg, summary_msg] - else: - messages = [summary_msg] - logger.info( - "Trying with 0 recent messages (system + summary only)" - ) - else: - # Slice from ORIGINAL recent_messages to avoid duplicating summary - reduced_recent = ( - recent_messages[-keep_count:] - if len(recent_messages) >= keep_count - else recent_messages - ) - # Ensure tool pairs stay intact in the reduced slice - reduced_slice_start = max( - 0, len(recent_messages) - keep_count - ) - reduced_recent = _ensure_tool_pairs_intact( - reduced_recent, recent_messages, reduced_slice_start - ) - if has_system_prompt: - messages = [ - system_msg, - summary_msg, - ] + reduced_recent - else: - messages = [summary_msg] + reduced_recent - - new_messages_dict = [] - for msg in messages: - if isinstance(msg, dict): - msg_dict = { - k: v for k, v in msg.items() if v is not None - } - else: - msg_dict = dict(msg) - new_messages_dict.append(msg_dict) - - new_token_count = estimate_token_count( - new_messages_dict, model=token_count_model - ) - - if new_token_count <= 120_000: - logger.info( - f"Reduced to {keep_count} recent messages, " - f"now {new_token_count} tokens" - ) - break - else: - logger.error( - f"Unable to reduce token count below threshold even with 0 messages. " - f"Final count: {new_token_count} tokens" - ) - # ABSOLUTE LAST RESORT: Drop system prompt - # This should only happen if summary itself is massive - if has_system_prompt and len(messages) > 1: - messages = messages[1:] # Drop system prompt - logger.critical( - "CRITICAL: Dropped system prompt as absolute last resort. " - "Behavioral consistency may be affected." - ) - # Yield error to user - yield StreamError( - errorText=( - "Warning: System prompt dropped due to size constraints. " - "Assistant behavior may be affected." - ) - ) - else: - # No old messages to summarize - all messages are "recent" - # Apply progressive truncation to reduce token count - logger.warning( - f"Token count {token_count} exceeds threshold but no old messages to summarize. " - f"Applying progressive truncation to recent messages." - ) - - # Create a base list excluding system prompt to avoid duplication - # This is the pool of messages we'll slice from in the loop - # Use messages_dict for type consistency with _ensure_tool_pairs_intact - base_msgs = ( - messages_dict[1:] if has_system_prompt else messages_dict - ) - - # Try progressively smaller keep counts - new_token_count = token_count # Initialize with current count - for keep_count in [12, 10, 8, 5, 3, 2, 1, 0]: - if keep_count == 0: - # Try with just system prompt (no recent messages) - if has_system_prompt: - messages = [system_msg] - logger.info( - "Trying with 0 recent messages (system prompt only)" - ) - else: - # No system prompt and no recent messages = empty messages list - # This is invalid, skip this iteration - continue - else: - if len(base_msgs) < keep_count: - continue # Skip if we don't have enough messages - - # Slice from base_msgs to get recent messages (without system prompt) - recent_messages = base_msgs[-keep_count:] - - # Ensure tool pairs stay intact in the reduced slice - reduced_slice_start = max(0, len(base_msgs) - keep_count) - recent_messages = _ensure_tool_pairs_intact( - recent_messages, base_msgs, reduced_slice_start - ) - - if has_system_prompt: - messages = [system_msg] + recent_messages - else: - messages = recent_messages - - new_messages_dict = [] - for msg in messages: - if msg is None: - continue # Skip None messages (type safety) - if isinstance(msg, dict): - msg_dict = { - k: v for k, v in msg.items() if v is not None - } - else: - msg_dict = dict(msg) - new_messages_dict.append(msg_dict) - - new_token_count = estimate_token_count( - new_messages_dict, model=token_count_model - ) - - if new_token_count <= 120_000: - logger.info( - f"Reduced to {keep_count} recent messages, " - f"now {new_token_count} tokens" - ) - break - else: - # Even with 0 messages still over limit - logger.error( - f"Unable to reduce token count below threshold even with 0 messages. " - f"Final count: {new_token_count} tokens. Messages may be extremely large." - ) - # ABSOLUTE LAST RESORT: Drop system prompt - if has_system_prompt and len(messages) > 1: - messages = messages[1:] # Drop system prompt - logger.critical( - "CRITICAL: Dropped system prompt as absolute last resort. " - "Behavioral consistency may be affected." - ) - # Yield error to user - yield StreamError( - errorText=( - "Warning: System prompt dropped due to size constraints. " - "Assistant behavior may be affected." - ) - ) - - except Exception as e: - logger.error(f"Context summarization failed: {e}", exc_info=True) - # If we were over the token limit, yield error to user - # Don't silently continue with oversized messages that will fail - if token_count > 120_000: + if context_result.error: + if "System prompt dropped" in context_result.error: + # Warning only - continue with reduced context yield StreamError( errorText=( - f"Unable to manage context window (token limit exceeded: {token_count} tokens). " - "Context summarization failed. Please start a new conversation." + "Warning: System prompt dropped due to size constraints. " + "Assistant behavior may be affected." + ) + ) + else: + # Any other error - abort to prevent failed LLM calls + yield StreamError( + errorText=( + f"Context window management failed: {context_result.error}. " + "Please start a new conversation." ) ) yield StreamFinish() return - # Otherwise, continue with original messages (under limit) + + messages = context_result.messages + if context_result.was_compacted: + logger.info( + f"Context compacted for streaming: {context_result.token_count} tokens" + ) # Loop to handle tool calls and continue conversation while True: @@ -1369,14 +1294,6 @@ async def _stream_chat_chunks( :128 ] # OpenRouter limit - # Create the stream with proper types - from typing import cast - - from openai.types.chat import ( - ChatCompletionMessageParam, - ChatCompletionStreamOptionsParam, - ) - stream = await client.chat.completions.create( model=model, messages=cast(list[ChatCompletionMessageParam], messages), @@ -1834,6 +1751,11 @@ async def _execute_long_running_tool( tool_call_id=tool_call_id, result=error_response.model_dump_json(), ) + # Generate LLM continuation so user sees explanation even for errors + try: + await _generate_llm_continuation(session_id=session_id, user_id=user_id) + except Exception as llm_err: + logger.warning(f"Failed to generate LLM continuation for error: {llm_err}") finally: await _mark_operation_completed(tool_call_id) @@ -1895,17 +1817,36 @@ async def _generate_llm_continuation( # Build system prompt system_prompt, _ = await _build_system_prompt(user_id) - # Build messages in OpenAI format messages = session.to_openai_messages() if system_prompt: - from openai.types.chat import ChatCompletionSystemMessageParam - system_message = ChatCompletionSystemMessageParam( role="system", content=system_prompt, ) messages = [system_message] + messages + # Apply context window management to prevent oversized requests + context_result = await _manage_context_window( + messages=messages, + model=config.model, + api_key=config.api_key, + base_url=config.base_url, + ) + + if context_result.error and "System prompt dropped" not in context_result.error: + logger.error( + f"Context window management failed for session {session_id}: " + f"{context_result.error} (tokens={context_result.token_count})" + ) + return + + messages = context_result.messages + if context_result.was_compacted: + logger.info( + f"Context compacted for LLM continuation: " + f"{context_result.token_count} tokens" + ) + # Build extra_body for tracing extra_body: dict[str, Any] = { "posthogProperties": { @@ -1918,19 +1859,54 @@ async def _generate_llm_continuation( if session_id: extra_body["session_id"] = session_id[:128] - # Make non-streaming LLM call (no tools - just text response) - from typing import cast + retry_count = 0 + last_error: Exception | None = None + response = None - from openai.types.chat import ChatCompletionMessageParam + while retry_count <= MAX_RETRIES: + try: + logger.info( + f"Generating LLM continuation for session {session_id}" + f"{f' (retry {retry_count}/{MAX_RETRIES})' if retry_count > 0 else ''}" + ) - # No tools parameter = text-only response (no tool calls) - response = await client.chat.completions.create( - model=config.model, - messages=cast(list[ChatCompletionMessageParam], messages), - extra_body=extra_body, - ) + response = await client.chat.completions.create( + model=config.model, + messages=cast(list[ChatCompletionMessageParam], messages), + extra_body=extra_body, + ) + last_error = None # Clear any previous error on success + break # Success, exit retry loop + except Exception as e: + last_error = e + if _is_retryable_error(e) and retry_count < MAX_RETRIES: + retry_count += 1 + delay = min( + BASE_DELAY_SECONDS * (2 ** (retry_count - 1)), + MAX_DELAY_SECONDS, + ) + logger.warning( + f"Retryable error in LLM continuation: {e!s}. " + f"Retrying in {delay:.1f}s (attempt {retry_count}/{MAX_RETRIES})" + ) + await asyncio.sleep(delay) + continue + else: + # Non-retryable error - log and exit gracefully + logger.error( + f"Non-retryable error in LLM continuation: {e!s}", + exc_info=True, + ) + return - if response.choices and response.choices[0].message.content: + if last_error: + logger.error( + f"Max retries ({MAX_RETRIES}) exceeded for LLM continuation. " + f"Last error: {last_error!s}" + ) + return + + if response and response.choices and response.choices[0].message.content: assistant_content = response.choices[0].message.content # Reload session from DB to avoid race condition with user messages diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/__init__.py b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/__init__.py index 392f642c41..b7650b3cbd 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/__init__.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/__init__.py @@ -2,27 +2,54 @@ from .core import ( AgentGeneratorNotConfiguredError, + AgentJsonValidationError, + AgentSummary, + DecompositionResult, + DecompositionStep, + LibraryAgentSummary, + MarketplaceAgentSummary, decompose_goal, + enrich_library_agents_from_steps, + extract_search_terms_from_steps, + extract_uuids_from_text, generate_agent, generate_agent_patch, get_agent_as_json, + get_all_relevant_agents_for_generation, + get_library_agent_by_graph_id, + get_library_agent_by_id, + get_library_agents_for_generation, json_to_graph, save_agent_to_library, + search_marketplace_agents_for_generation, ) +from .errors import get_user_message_for_error from .service import health_check as check_external_service_health from .service import is_external_service_configured __all__ = [ - # Core functions + "AgentGeneratorNotConfiguredError", + "AgentJsonValidationError", + "AgentSummary", + "DecompositionResult", + "DecompositionStep", + "LibraryAgentSummary", + "MarketplaceAgentSummary", + "check_external_service_health", "decompose_goal", + "enrich_library_agents_from_steps", + "extract_search_terms_from_steps", + "extract_uuids_from_text", "generate_agent", "generate_agent_patch", - "save_agent_to_library", "get_agent_as_json", - "json_to_graph", - # Exceptions - "AgentGeneratorNotConfiguredError", - # Service + "get_all_relevant_agents_for_generation", + "get_library_agent_by_graph_id", + "get_library_agent_by_id", + "get_library_agents_for_generation", + "get_user_message_for_error", "is_external_service_configured", - "check_external_service_health", + "json_to_graph", + "save_agent_to_library", + "search_marketplace_agents_for_generation", ] diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py index fc15587110..0ddd2aa86b 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py @@ -1,11 +1,22 @@ """Core agent generation functions.""" import logging +import re import uuid -from typing import Any +from typing import Any, NotRequired, TypedDict from backend.api.features.library import db as library_db -from backend.data.graph import Graph, Link, Node, create_graph +from backend.api.features.store import db as store_db +from backend.data.graph import ( + Graph, + Link, + Node, + create_graph, + get_graph, + get_graph_all_versions, + get_store_listed_graphs, +) +from backend.util.exceptions import DatabaseError, NotFoundError from .service import ( decompose_goal_external, @@ -16,6 +27,74 @@ from .service import ( logger = logging.getLogger(__name__) +AGENT_EXECUTOR_BLOCK_ID = "e189baac-8c20-45a1-94a7-55177ea42565" + + +class ExecutionSummary(TypedDict): + """Summary of a single execution for quality assessment.""" + + status: str + correctness_score: NotRequired[float] + activity_summary: NotRequired[str] + + +class LibraryAgentSummary(TypedDict): + """Summary of a library agent for sub-agent composition. + + Includes recent executions to help the LLM decide whether to use this agent. + Each execution shows status, correctness_score (0-1), and activity_summary. + """ + + graph_id: str + graph_version: int + name: str + description: str + input_schema: dict[str, Any] + output_schema: dict[str, Any] + recent_executions: NotRequired[list[ExecutionSummary]] + + +class MarketplaceAgentSummary(TypedDict): + """Summary of a marketplace agent for sub-agent composition.""" + + name: str + description: str + sub_heading: str + creator: str + is_marketplace_agent: bool + + +class DecompositionStep(TypedDict, total=False): + """A single step in decomposed instructions.""" + + description: str + action: str + block_name: str + tool: str + name: str + + +class DecompositionResult(TypedDict, total=False): + """Result from decompose_goal - can be instructions, questions, or error.""" + + type: str + steps: list[DecompositionStep] + questions: list[dict[str, Any]] + error: str + error_type: str + + +AgentSummary = LibraryAgentSummary | MarketplaceAgentSummary | dict[str, Any] + + +def _to_dict_list( + agents: list[AgentSummary] | list[dict[str, Any]] | None, +) -> list[dict[str, Any]] | None: + """Convert typed agent summaries to plain dicts for external service calls.""" + if agents is None: + return None + return [dict(a) for a in agents] + class AgentGeneratorNotConfiguredError(Exception): """Raised when the external Agent Generator service is not configured.""" @@ -36,15 +115,422 @@ def _check_service_configured() -> None: ) -async def decompose_goal(description: str, context: str = "") -> dict[str, Any] | None: +_UUID_PATTERN = re.compile( + r"[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}", + re.IGNORECASE, +) + + +def extract_uuids_from_text(text: str) -> list[str]: + """Extract all UUID v4 strings from text. + + Args: + text: Text that may contain UUIDs (e.g., user's goal description) + + Returns: + List of unique UUIDs found in the text (lowercase) + """ + matches = _UUID_PATTERN.findall(text) + return list({m.lower() for m in matches}) + + +async def get_library_agent_by_id( + user_id: str, agent_id: str +) -> LibraryAgentSummary | None: + """Fetch a specific library agent by its ID (library agent ID or graph_id). + + This function tries multiple lookup strategies: + 1. First tries to find by graph_id (AgentGraph primary key) + 2. If not found, tries to find by library agent ID (LibraryAgent primary key) + + This handles both cases: + - User provides graph_id (e.g., from AgentExecutorBlock) + - User provides library agent ID (e.g., from library URL) + + Args: + user_id: The user ID + agent_id: The ID to look up (can be graph_id or library agent ID) + + Returns: + LibraryAgentSummary if found, None otherwise + """ + try: + agent = await library_db.get_library_agent_by_graph_id(user_id, agent_id) + if agent: + logger.debug(f"Found library agent by graph_id: {agent.name}") + return LibraryAgentSummary( + graph_id=agent.graph_id, + graph_version=agent.graph_version, + name=agent.name, + description=agent.description, + input_schema=agent.input_schema, + output_schema=agent.output_schema, + ) + except DatabaseError: + raise + except Exception as e: + logger.debug(f"Could not fetch library agent by graph_id {agent_id}: {e}") + + try: + agent = await library_db.get_library_agent(agent_id, user_id) + if agent: + logger.debug(f"Found library agent by library_id: {agent.name}") + return LibraryAgentSummary( + graph_id=agent.graph_id, + graph_version=agent.graph_version, + name=agent.name, + description=agent.description, + input_schema=agent.input_schema, + output_schema=agent.output_schema, + ) + except NotFoundError: + logger.debug(f"Library agent not found by library_id: {agent_id}") + except DatabaseError: + raise + except Exception as e: + logger.warning( + f"Could not fetch library agent by library_id {agent_id}: {e}", + exc_info=True, + ) + + return None + + +get_library_agent_by_graph_id = get_library_agent_by_id + + +async def get_library_agents_for_generation( + user_id: str, + search_query: str | None = None, + exclude_graph_id: str | None = None, + max_results: int = 15, +) -> list[LibraryAgentSummary]: + """Fetch user's library agents formatted for Agent Generator. + + Uses search-based fetching to return relevant agents instead of all agents. + This is more scalable for users with large libraries. + + Includes recent_executions list to help the LLM assess agent quality: + - Each execution has status, correctness_score (0-1), and activity_summary + - This gives the LLM concrete examples of recent performance + + Args: + user_id: The user ID + search_query: Optional search term to find relevant agents (user's goal/description) + exclude_graph_id: Optional graph ID to exclude (prevents circular references) + max_results: Maximum number of agents to return (default 15) + + Returns: + List of LibraryAgentSummary with schemas and recent executions for sub-agent composition + """ + try: + response = await library_db.list_library_agents( + user_id=user_id, + search_term=search_query, + page=1, + page_size=max_results, + include_executions=True, + ) + + results: list[LibraryAgentSummary] = [] + for agent in response.agents: + if exclude_graph_id is not None and agent.graph_id == exclude_graph_id: + continue + + summary = LibraryAgentSummary( + graph_id=agent.graph_id, + graph_version=agent.graph_version, + name=agent.name, + description=agent.description, + input_schema=agent.input_schema, + output_schema=agent.output_schema, + ) + if agent.recent_executions: + exec_summaries: list[ExecutionSummary] = [] + for ex in agent.recent_executions: + exec_sum = ExecutionSummary(status=ex.status) + if ex.correctness_score is not None: + exec_sum["correctness_score"] = ex.correctness_score + if ex.activity_summary: + exec_sum["activity_summary"] = ex.activity_summary + exec_summaries.append(exec_sum) + summary["recent_executions"] = exec_summaries + results.append(summary) + return results + except DatabaseError: + raise + except Exception as e: + logger.warning(f"Failed to fetch library agents: {e}") + return [] + + +async def search_marketplace_agents_for_generation( + search_query: str, + max_results: int = 10, +) -> list[LibraryAgentSummary]: + """Search marketplace agents formatted for Agent Generator. + + Fetches marketplace agents and their full schemas so they can be used + as sub-agents in generated workflows. + + Args: + search_query: Search term to find relevant public agents + max_results: Maximum number of agents to return (default 10) + + Returns: + List of LibraryAgentSummary with full input/output schemas + """ + try: + response = await store_db.get_store_agents( + search_query=search_query, + page=1, + page_size=max_results, + ) + + agents_with_graphs = [ + agent for agent in response.agents if agent.agent_graph_id + ] + + if not agents_with_graphs: + return [] + + graph_ids = [agent.agent_graph_id for agent in agents_with_graphs] + graphs = await get_store_listed_graphs(*graph_ids) + + results: list[LibraryAgentSummary] = [] + for agent in agents_with_graphs: + graph_id = agent.agent_graph_id + if graph_id and graph_id in graphs: + graph = graphs[graph_id] + results.append( + LibraryAgentSummary( + graph_id=graph.id, + graph_version=graph.version, + name=agent.agent_name, + description=agent.description, + input_schema=graph.input_schema, + output_schema=graph.output_schema, + ) + ) + return results + except Exception as e: + logger.warning(f"Failed to search marketplace agents: {e}") + return [] + + +async def get_all_relevant_agents_for_generation( + user_id: str, + search_query: str | None = None, + exclude_graph_id: str | None = None, + include_library: bool = True, + include_marketplace: bool = True, + max_library_results: int = 15, + max_marketplace_results: int = 10, +) -> list[AgentSummary]: + """Fetch relevant agents from library and/or marketplace. + + Searches both user's library and marketplace by default. + Explicitly mentioned UUIDs in the search query are always looked up. + + Args: + user_id: The user ID + search_query: Search term to find relevant agents (user's goal/description) + exclude_graph_id: Optional graph ID to exclude (prevents circular references) + include_library: Whether to search user's library (default True) + include_marketplace: Whether to also search marketplace (default True) + max_library_results: Max library agents to return (default 15) + max_marketplace_results: Max marketplace agents to return (default 10) + + Returns: + List of AgentSummary with full schemas (both library and marketplace agents) + """ + agents: list[AgentSummary] = [] + seen_graph_ids: set[str] = set() + + if search_query: + mentioned_uuids = extract_uuids_from_text(search_query) + for graph_id in mentioned_uuids: + if graph_id == exclude_graph_id: + continue + agent = await get_library_agent_by_graph_id(user_id, graph_id) + agent_graph_id = agent.get("graph_id") if agent else None + if agent and agent_graph_id and agent_graph_id not in seen_graph_ids: + agents.append(agent) + seen_graph_ids.add(agent_graph_id) + logger.debug( + f"Found explicitly mentioned agent: {agent.get('name') or 'Unknown'}" + ) + + if include_library: + library_agents = await get_library_agents_for_generation( + user_id=user_id, + search_query=search_query, + exclude_graph_id=exclude_graph_id, + max_results=max_library_results, + ) + for agent in library_agents: + graph_id = agent.get("graph_id") + if graph_id and graph_id not in seen_graph_ids: + agents.append(agent) + seen_graph_ids.add(graph_id) + + if include_marketplace and search_query: + marketplace_agents = await search_marketplace_agents_for_generation( + search_query=search_query, + max_results=max_marketplace_results, + ) + for agent in marketplace_agents: + graph_id = agent.get("graph_id") + if graph_id and graph_id not in seen_graph_ids: + agents.append(agent) + seen_graph_ids.add(graph_id) + + return agents + + +def extract_search_terms_from_steps( + decomposition_result: DecompositionResult | dict[str, Any], +) -> list[str]: + """Extract search terms from decomposed instruction steps. + + Analyzes the decomposition result to extract relevant keywords + for additional library agent searches. + + Args: + decomposition_result: Result from decompose_goal containing steps + + Returns: + List of unique search terms extracted from steps + """ + search_terms: list[str] = [] + + if decomposition_result.get("type") != "instructions": + return search_terms + + steps = decomposition_result.get("steps", []) + if not steps: + return search_terms + + step_keys: list[str] = ["description", "action", "block_name", "tool", "name"] + + for step in steps: + for key in step_keys: + value = step.get(key) # type: ignore[union-attr] + if isinstance(value, str) and len(value) > 3: + search_terms.append(value) + + seen: set[str] = set() + unique_terms: list[str] = [] + for term in search_terms: + term_lower = term.lower() + if term_lower not in seen: + seen.add(term_lower) + unique_terms.append(term) + + return unique_terms + + +async def enrich_library_agents_from_steps( + user_id: str, + decomposition_result: DecompositionResult | dict[str, Any], + existing_agents: list[AgentSummary] | list[dict[str, Any]], + exclude_graph_id: str | None = None, + include_marketplace: bool = True, + max_additional_results: int = 10, +) -> list[AgentSummary] | list[dict[str, Any]]: + """Enrich library agents list with additional searches based on decomposed steps. + + This implements two-phase search: after decomposition, we search for additional + relevant agents based on the specific steps identified. + + Args: + user_id: The user ID + decomposition_result: Result from decompose_goal containing steps + existing_agents: Already fetched library agents from initial search + exclude_graph_id: Optional graph ID to exclude + include_marketplace: Whether to also search marketplace + max_additional_results: Max additional agents per search term (default 10) + + Returns: + Combined list of library agents (existing + newly discovered) + """ + search_terms = extract_search_terms_from_steps(decomposition_result) + + if not search_terms: + return existing_agents + + existing_ids: set[str] = set() + existing_names: set[str] = set() + + for agent in existing_agents: + agent_name = agent.get("name") + if agent_name and isinstance(agent_name, str): + existing_names.add(agent_name.lower()) + graph_id = agent.get("graph_id") # type: ignore[call-overload] + if graph_id and isinstance(graph_id, str): + existing_ids.add(graph_id) + + all_agents: list[AgentSummary] | list[dict[str, Any]] = list(existing_agents) + + for term in search_terms[:3]: + try: + additional_agents = await get_all_relevant_agents_for_generation( + user_id=user_id, + search_query=term, + exclude_graph_id=exclude_graph_id, + include_marketplace=include_marketplace, + max_library_results=max_additional_results, + max_marketplace_results=5, + ) + + for agent in additional_agents: + agent_name = agent.get("name") + if not agent_name or not isinstance(agent_name, str): + continue + agent_name_lower = agent_name.lower() + + if agent_name_lower in existing_names: + continue + + graph_id = agent.get("graph_id") # type: ignore[call-overload] + if graph_id and graph_id in existing_ids: + continue + + all_agents.append(agent) + existing_names.add(agent_name_lower) + if graph_id and isinstance(graph_id, str): + existing_ids.add(graph_id) + + except DatabaseError: + logger.error(f"Database error searching for agents with term '{term}'") + raise + except Exception as e: + logger.warning( + f"Failed to search for additional agents with term '{term}': {e}" + ) + + logger.debug( + f"Enriched library agents: {len(existing_agents)} initial + " + f"{len(all_agents) - len(existing_agents)} additional = {len(all_agents)} total" + ) + + return all_agents + + +async def decompose_goal( + description: str, + context: str = "", + library_agents: list[AgentSummary] | None = None, +) -> DecompositionResult | None: """Break down a goal into steps or return clarifying questions. Args: description: Natural language goal description context: Additional context (e.g., answers to previous questions) + library_agents: User's library agents available for sub-agent composition Returns: - Dict with either: + DecompositionResult with either: - {"type": "clarifying_questions", "questions": [...]} - {"type": "instructions", "steps": [...]} Or None on error @@ -54,26 +540,36 @@ async def decompose_goal(description: str, context: str = "") -> dict[str, Any] """ _check_service_configured() logger.info("Calling external Agent Generator service for decompose_goal") - return await decompose_goal_external(description, context) + result = await decompose_goal_external( + description, context, _to_dict_list(library_agents) + ) + return result # type: ignore[return-value] -async def generate_agent(instructions: dict[str, Any]) -> dict[str, Any] | None: +async def generate_agent( + instructions: DecompositionResult | dict[str, Any], + library_agents: list[AgentSummary] | list[dict[str, Any]] | None = None, +) -> dict[str, Any] | None: """Generate agent JSON from instructions. Args: instructions: Structured instructions from decompose_goal + library_agents: User's library agents available for sub-agent composition Returns: - Agent JSON dict or None on error + Agent JSON dict, error dict {"type": "error", ...}, or None on error Raises: AgentGeneratorNotConfiguredError: If the external service is not configured. """ _check_service_configured() logger.info("Calling external Agent Generator service for generate_agent") - result = await generate_agent_external(instructions) + result = await generate_agent_external( + dict(instructions), _to_dict_list(library_agents) + ) if result: - # Ensure required fields + if isinstance(result, dict) and result.get("type") == "error": + return result if "id" not in result: result["id"] = str(uuid.uuid4()) if "version" not in result: @@ -83,6 +579,12 @@ async def generate_agent(instructions: dict[str, Any]) -> dict[str, Any] | None: return result +class AgentJsonValidationError(Exception): + """Raised when agent JSON is invalid or missing required fields.""" + + pass + + def json_to_graph(agent_json: dict[str, Any]) -> Graph: """Convert agent JSON dict to Graph model. @@ -91,25 +593,55 @@ def json_to_graph(agent_json: dict[str, Any]) -> Graph: Returns: Graph ready for saving + + Raises: + AgentJsonValidationError: If required fields are missing from nodes or links """ nodes = [] - for n in agent_json.get("nodes", []): + for idx, n in enumerate(agent_json.get("nodes", [])): + block_id = n.get("block_id") + if not block_id: + node_id = n.get("id", f"index_{idx}") + raise AgentJsonValidationError( + f"Node '{node_id}' is missing required field 'block_id'" + ) node = Node( id=n.get("id", str(uuid.uuid4())), - block_id=n["block_id"], + block_id=block_id, input_default=n.get("input_default", {}), metadata=n.get("metadata", {}), ) nodes.append(node) links = [] - for link_data in agent_json.get("links", []): + for idx, link_data in enumerate(agent_json.get("links", [])): + source_id = link_data.get("source_id") + sink_id = link_data.get("sink_id") + source_name = link_data.get("source_name") + sink_name = link_data.get("sink_name") + + missing_fields = [] + if not source_id: + missing_fields.append("source_id") + if not sink_id: + missing_fields.append("sink_id") + if not source_name: + missing_fields.append("source_name") + if not sink_name: + missing_fields.append("sink_name") + + if missing_fields: + link_id = link_data.get("id", f"index_{idx}") + raise AgentJsonValidationError( + f"Link '{link_id}' is missing required fields: {', '.join(missing_fields)}" + ) + link = Link( id=link_data.get("id", str(uuid.uuid4())), - source_id=link_data["source_id"], - sink_id=link_data["sink_id"], - source_name=link_data["source_name"], - sink_name=link_data["sink_name"], + source_id=source_id, + sink_id=sink_id, + source_name=source_name, + sink_name=sink_name, is_static=link_data.get("is_static", False), ) links.append(link) @@ -130,22 +662,40 @@ def _reassign_node_ids(graph: Graph) -> None: This is needed when creating a new version to avoid unique constraint violations. """ - # Create mapping from old node IDs to new UUIDs id_map = {node.id: str(uuid.uuid4()) for node in graph.nodes} - # Reassign node IDs for node in graph.nodes: node.id = id_map[node.id] - # Update link references to use new node IDs for link in graph.links: - link.id = str(uuid.uuid4()) # Also give links new IDs + link.id = str(uuid.uuid4()) if link.source_id in id_map: link.source_id = id_map[link.source_id] if link.sink_id in id_map: link.sink_id = id_map[link.sink_id] +def _populate_agent_executor_user_ids(agent_json: dict[str, Any], user_id: str) -> None: + """Populate user_id in AgentExecutorBlock nodes. + + The external agent generator creates AgentExecutorBlock nodes with empty user_id. + This function fills in the actual user_id so sub-agents run with correct permissions. + + Args: + agent_json: Agent JSON dict (modified in place) + user_id: User ID to set + """ + for node in agent_json.get("nodes", []): + if node.get("block_id") == AGENT_EXECUTOR_BLOCK_ID: + input_default = node.get("input_default") or {} + if not input_default.get("user_id"): + input_default["user_id"] = user_id + node["input_default"] = input_default + logger.debug( + f"Set user_id for AgentExecutorBlock node {node.get('id')}" + ) + + async def save_agent_to_library( agent_json: dict[str, Any], user_id: str, is_update: bool = False ) -> tuple[Graph, Any]: @@ -159,33 +709,27 @@ async def save_agent_to_library( Returns: Tuple of (created Graph, LibraryAgent) """ - from backend.data.graph import get_graph_all_versions + # Populate user_id in AgentExecutorBlock nodes before conversion + _populate_agent_executor_user_ids(agent_json, user_id) graph = json_to_graph(agent_json) if is_update: - # For updates, keep the same graph ID but increment version - # and reassign node/link IDs to avoid conflicts if graph.id: existing_versions = await get_graph_all_versions(graph.id, user_id) if existing_versions: latest_version = max(v.version for v in existing_versions) graph.version = latest_version + 1 - # Reassign node IDs (but keep graph ID the same) _reassign_node_ids(graph) logger.info(f"Updating agent {graph.id} to version {graph.version}") else: - # For new agents, always generate a fresh UUID to avoid collisions graph.id = str(uuid.uuid4()) graph.version = 1 - # Reassign all node IDs as well _reassign_node_ids(graph) logger.info(f"Creating new agent with ID {graph.id}") - # Save to database created_graph = await create_graph(graph, user_id) - # Add to user's library (or update existing library agent) library_agents = await library_db.create_library_agent( graph=created_graph, user_id=user_id, @@ -197,25 +741,31 @@ async def save_agent_to_library( async def get_agent_as_json( - graph_id: str, user_id: str | None + agent_id: str, user_id: str | None ) -> dict[str, Any] | None: """Fetch an agent and convert to JSON format for editing. Args: - graph_id: Graph ID or library agent ID + agent_id: Graph ID or library agent ID user_id: User ID Returns: Agent as JSON dict or None if not found """ - from backend.data.graph import get_graph + graph = await get_graph(agent_id, version=None, user_id=user_id) + + if not graph and user_id: + try: + library_agent = await library_db.get_library_agent(agent_id, user_id) + graph = await get_graph( + library_agent.graph_id, version=None, user_id=user_id + ) + except NotFoundError: + pass - # Try to get the graph (version=None gets the active version) - graph = await get_graph(graph_id, version=None, user_id=user_id) if not graph: return None - # Convert to JSON format nodes = [] for node in graph.nodes: nodes.append( @@ -253,7 +803,9 @@ async def get_agent_as_json( async def generate_agent_patch( - update_request: str, current_agent: dict[str, Any] + update_request: str, + current_agent: dict[str, Any], + library_agents: list[AgentSummary] | None = None, ) -> dict[str, Any] | None: """Update an existing agent using natural language. @@ -265,13 +817,17 @@ async def generate_agent_patch( Args: update_request: Natural language description of changes current_agent: Current agent JSON + library_agents: User's library agents available for sub-agent composition Returns: - Updated agent JSON, clarifying questions dict, or None on error + Updated agent JSON, clarifying questions dict {"type": "clarifying_questions", ...}, + error dict {"type": "error", ...}, or None on unexpected error Raises: AgentGeneratorNotConfiguredError: If the external service is not configured. """ _check_service_configured() logger.info("Calling external Agent Generator service for generate_agent_patch") - return await generate_agent_patch_external(update_request, current_agent) + return await generate_agent_patch_external( + update_request, current_agent, _to_dict_list(library_agents) + ) diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/errors.py b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/errors.py new file mode 100644 index 0000000000..282d8cf9aa --- /dev/null +++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/errors.py @@ -0,0 +1,95 @@ +"""Error handling utilities for agent generator.""" + +import re + + +def _sanitize_error_details(details: str) -> str: + """Sanitize error details to remove sensitive information. + + Strips common patterns that could expose internal system info: + - File paths (Unix and Windows) + - Database connection strings + - URLs with credentials + - Stack trace internals + + Args: + details: Raw error details string + + Returns: + Sanitized error details safe for user display + """ + sanitized = re.sub( + r"/[a-zA-Z0-9_./\-]+\.(py|js|ts|json|yaml|yml)", "[path]", details + ) + sanitized = re.sub(r"[A-Z]:\\[a-zA-Z0-9_\\.\\-]+", "[path]", sanitized) + sanitized = re.sub( + r"(postgres|mysql|mongodb|redis)://[^\s]+", "[database_url]", sanitized + ) + sanitized = re.sub(r"https?://[^:]+:[^@]+@[^\s]+", "[url]", sanitized) + sanitized = re.sub(r", line \d+", "", sanitized) + sanitized = re.sub(r'File "[^"]+",?', "", sanitized) + + return sanitized.strip() + + +def get_user_message_for_error( + error_type: str, + operation: str = "process the request", + llm_parse_message: str | None = None, + validation_message: str | None = None, + error_details: str | None = None, +) -> str: + """Get a user-friendly error message based on error type. + + This function maps internal error types to user-friendly messages, + providing a consistent experience across different agent operations. + + Args: + error_type: The error type from the external service + (e.g., "llm_parse_error", "timeout", "rate_limit") + operation: Description of what operation failed, used in the default + message (e.g., "analyze the goal", "generate the agent") + llm_parse_message: Custom message for llm_parse_error type + validation_message: Custom message for validation_error type + error_details: Optional additional details about the error + + Returns: + User-friendly error message suitable for display to the user + """ + base_message = "" + + if error_type == "llm_parse_error": + base_message = ( + llm_parse_message + or "The AI had trouble processing this request. Please try again." + ) + elif error_type == "validation_error": + base_message = ( + validation_message + or "The generated agent failed validation. " + "This usually happens when the agent structure doesn't match " + "what the platform expects. Please try simplifying your goal " + "or breaking it into smaller parts." + ) + elif error_type == "patch_error": + base_message = ( + "Failed to apply the changes. The modification couldn't be " + "validated. Please try a different approach or simplify the change." + ) + elif error_type in ("timeout", "llm_timeout"): + base_message = ( + "The request took too long to process. This can happen with " + "complex agents. Please try again or simplify your goal." + ) + elif error_type in ("rate_limit", "llm_rate_limit"): + base_message = "The service is currently busy. Please try again in a moment." + else: + base_message = f"Failed to {operation}. Please try again." + + if error_details: + details = _sanitize_error_details(error_details) + if len(details) > 200: + details = details[:200] + "..." + base_message += f"\n\nTechnical details: {details}" + + return base_message diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py index a4d2f1af15..c9c960d1ae 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py @@ -14,6 +14,70 @@ from backend.util.settings import Settings logger = logging.getLogger(__name__) + +def _create_error_response( + error_message: str, + error_type: str = "unknown", + details: dict[str, Any] | None = None, +) -> dict[str, Any]: + """Create a standardized error response dict. + + Args: + error_message: Human-readable error message + error_type: Machine-readable error type + details: Optional additional error details + + Returns: + Error dict with type="error" and error details + """ + response: dict[str, Any] = { + "type": "error", + "error": error_message, + "error_type": error_type, + } + if details: + response["details"] = details + return response + + +def _classify_http_error(e: httpx.HTTPStatusError) -> tuple[str, str]: + """Classify an HTTP error into error_type and message. + + Args: + e: The HTTP status error + + Returns: + Tuple of (error_type, error_message) + """ + status = e.response.status_code + if status == 429: + return "rate_limit", f"Agent Generator rate limited: {e}" + elif status == 503: + return "service_unavailable", f"Agent Generator unavailable: {e}" + elif status == 504 or status == 408: + return "timeout", f"Agent Generator timed out: {e}" + else: + return "http_error", f"HTTP error calling Agent Generator: {e}" + + +def _classify_request_error(e: httpx.RequestError) -> tuple[str, str]: + """Classify a request error into error_type and message. + + Args: + e: The request error + + Returns: + Tuple of (error_type, error_message) + """ + error_str = str(e).lower() + if "timeout" in error_str or "timed out" in error_str: + return "timeout", f"Agent Generator request timed out: {e}" + elif "connect" in error_str: + return "connection_error", f"Could not connect to Agent Generator: {e}" + else: + return "request_error", f"Request error calling Agent Generator: {e}" + + _client: httpx.AsyncClient | None = None _settings: Settings | None = None @@ -53,13 +117,16 @@ def _get_client() -> httpx.AsyncClient: async def decompose_goal_external( - description: str, context: str = "" + description: str, + context: str = "", + library_agents: list[dict[str, Any]] | None = None, ) -> dict[str, Any] | None: """Call the external service to decompose a goal. Args: description: Natural language goal description context: Additional context (e.g., answers to previous questions) + library_agents: User's library agents available for sub-agent composition Returns: Dict with either: @@ -67,15 +134,17 @@ async def decompose_goal_external( - {"type": "instructions", "steps": [...]} - {"type": "unachievable_goal", ...} - {"type": "vague_goal", ...} - Or None on error + - {"type": "error", "error": "...", "error_type": "..."} on error + Or None on unexpected error """ client = _get_client() - # Build the request payload - payload: dict[str, Any] = {"description": description} if context: - # The external service uses user_instruction for additional context - payload["user_instruction"] = context + description = f"{description}\n\nAdditional context from user:\n{context}" + + payload: dict[str, Any] = {"description": description} + if library_agents: + payload["library_agents"] = library_agents try: response = await client.post("/api/decompose-description", json=payload) @@ -83,8 +152,13 @@ async def decompose_goal_external( data = response.json() if not data.get("success"): - logger.error(f"External service returned error: {data.get('error')}") - return None + error_msg = data.get("error", "Unknown error from Agent Generator") + error_type = data.get("error_type", "unknown") + logger.error( + f"Agent Generator decomposition failed: {error_msg} " + f"(type: {error_type})" + ) + return _create_error_response(error_msg, error_type) # Map the response to the expected format response_type = data.get("type") @@ -106,88 +180,120 @@ async def decompose_goal_external( "type": "vague_goal", "suggested_goal": data.get("suggested_goal"), } + elif response_type == "error": + # Pass through error from the service + return _create_error_response( + data.get("error", "Unknown error"), + data.get("error_type", "unknown"), + ) else: logger.error( f"Unknown response type from external service: {response_type}" ) - return None + return _create_error_response( + f"Unknown response type from Agent Generator: {response_type}", + "invalid_response", + ) except httpx.HTTPStatusError as e: - logger.error(f"HTTP error calling external agent generator: {e}") - return None + error_type, error_msg = _classify_http_error(e) + logger.error(error_msg) + return _create_error_response(error_msg, error_type) except httpx.RequestError as e: - logger.error(f"Request error calling external agent generator: {e}") - return None + error_type, error_msg = _classify_request_error(e) + logger.error(error_msg) + return _create_error_response(error_msg, error_type) except Exception as e: - logger.error(f"Unexpected error calling external agent generator: {e}") - return None + error_msg = f"Unexpected error calling Agent Generator: {e}" + logger.error(error_msg) + return _create_error_response(error_msg, "unexpected_error") async def generate_agent_external( - instructions: dict[str, Any] + instructions: dict[str, Any], + library_agents: list[dict[str, Any]] | None = None, ) -> dict[str, Any] | None: """Call the external service to generate an agent from instructions. Args: instructions: Structured instructions from decompose_goal + library_agents: User's library agents available for sub-agent composition Returns: - Agent JSON dict or None on error + Agent JSON dict on success, or error dict {"type": "error", ...} on error """ client = _get_client() + payload: dict[str, Any] = {"instructions": instructions} + if library_agents: + payload["library_agents"] = library_agents + try: - response = await client.post( - "/api/generate-agent", json={"instructions": instructions} - ) + response = await client.post("/api/generate-agent", json=payload) response.raise_for_status() data = response.json() if not data.get("success"): - logger.error(f"External service returned error: {data.get('error')}") - return None + error_msg = data.get("error", "Unknown error from Agent Generator") + error_type = data.get("error_type", "unknown") + logger.error( + f"Agent Generator generation failed: {error_msg} (type: {error_type})" + ) + return _create_error_response(error_msg, error_type) return data.get("agent_json") except httpx.HTTPStatusError as e: - logger.error(f"HTTP error calling external agent generator: {e}") - return None + error_type, error_msg = _classify_http_error(e) + logger.error(error_msg) + return _create_error_response(error_msg, error_type) except httpx.RequestError as e: - logger.error(f"Request error calling external agent generator: {e}") - return None + error_type, error_msg = _classify_request_error(e) + logger.error(error_msg) + return _create_error_response(error_msg, error_type) except Exception as e: - logger.error(f"Unexpected error calling external agent generator: {e}") - return None + error_msg = f"Unexpected error calling Agent Generator: {e}" + logger.error(error_msg) + return _create_error_response(error_msg, "unexpected_error") async def generate_agent_patch_external( - update_request: str, current_agent: dict[str, Any] + update_request: str, + current_agent: dict[str, Any], + library_agents: list[dict[str, Any]] | None = None, ) -> dict[str, Any] | None: """Call the external service to generate a patch for an existing agent. Args: update_request: Natural language description of changes current_agent: Current agent JSON + library_agents: User's library agents available for sub-agent composition Returns: - Updated agent JSON, clarifying questions dict, or None on error + Updated agent JSON, clarifying questions dict, or error dict on error """ client = _get_client() + payload: dict[str, Any] = { + "update_request": update_request, + "current_agent_json": current_agent, + } + if library_agents: + payload["library_agents"] = library_agents + try: - response = await client.post( - "/api/update-agent", - json={ - "update_request": update_request, - "current_agent_json": current_agent, - }, - ) + response = await client.post("/api/update-agent", json=payload) response.raise_for_status() data = response.json() if not data.get("success"): - logger.error(f"External service returned error: {data.get('error')}") - return None + error_msg = data.get("error", "Unknown error from Agent Generator") + error_type = data.get("error_type", "unknown") + logger.error( + f"Agent Generator patch generation failed: {error_msg} " + f"(type: {error_type})" + ) + return _create_error_response(error_msg, error_type) # Check if it's clarifying questions if data.get("type") == "clarifying_questions": @@ -196,18 +302,28 @@ async def generate_agent_patch_external( "questions": data.get("questions", []), } + # Check if it's an error passed through + if data.get("type") == "error": + return _create_error_response( + data.get("error", "Unknown error"), + data.get("error_type", "unknown"), + ) + # Otherwise return the updated agent JSON return data.get("agent_json") except httpx.HTTPStatusError as e: - logger.error(f"HTTP error calling external agent generator: {e}") - return None + error_type, error_msg = _classify_http_error(e) + logger.error(error_msg) + return _create_error_response(error_msg, error_type) except httpx.RequestError as e: - logger.error(f"Request error calling external agent generator: {e}") - return None + error_type, error_msg = _classify_request_error(e) + logger.error(error_msg) + return _create_error_response(error_msg, error_type) except Exception as e: - logger.error(f"Unexpected error calling external agent generator: {e}") - return None + error_msg = f"Unexpected error calling Agent Generator: {e}" + logger.error(error_msg) + return _create_error_response(error_msg, "unexpected_error") async def get_blocks_external() -> list[dict[str, Any]] | None: diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py b/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py index 5fa74ba04e..62d59c470e 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py @@ -1,6 +1,7 @@ """Shared agent search functionality for find_agent and find_library_agent tools.""" import logging +import re from typing import Literal from backend.api.features.library import db as library_db @@ -19,6 +20,85 @@ logger = logging.getLogger(__name__) SearchSource = Literal["marketplace", "library"] +_UUID_PATTERN = re.compile( + r"^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$", + re.IGNORECASE, +) + + +def _is_uuid(text: str) -> bool: + """Check if text is a valid UUID v4.""" + return bool(_UUID_PATTERN.match(text.strip())) + + +async def _get_library_agent_by_id(user_id: str, agent_id: str) -> AgentInfo | None: + """Fetch a library agent by ID (library agent ID or graph_id). + + Tries multiple lookup strategies: + 1. First by graph_id (AgentGraph primary key) + 2. Then by library agent ID (LibraryAgent primary key) + + Args: + user_id: The user ID + agent_id: The ID to look up (can be graph_id or library agent ID) + + Returns: + AgentInfo if found, None otherwise + """ + try: + agent = await library_db.get_library_agent_by_graph_id(user_id, agent_id) + if agent: + logger.debug(f"Found library agent by graph_id: {agent.name}") + return AgentInfo( + id=agent.id, + name=agent.name, + description=agent.description or "", + source="library", + in_library=True, + creator=agent.creator_name, + status=agent.status.value, + can_access_graph=agent.can_access_graph, + has_external_trigger=agent.has_external_trigger, + new_output=agent.new_output, + graph_id=agent.graph_id, + ) + except DatabaseError: + raise + except Exception as e: + logger.warning( + f"Could not fetch library agent by graph_id {agent_id}: {e}", + exc_info=True, + ) + + try: + agent = await library_db.get_library_agent(agent_id, user_id) + if agent: + logger.debug(f"Found library agent by library_id: {agent.name}") + return AgentInfo( + id=agent.id, + name=agent.name, + description=agent.description or "", + source="library", + in_library=True, + creator=agent.creator_name, + status=agent.status.value, + can_access_graph=agent.can_access_graph, + has_external_trigger=agent.has_external_trigger, + new_output=agent.new_output, + graph_id=agent.graph_id, + ) + except NotFoundError: + logger.debug(f"Library agent not found by library_id: {agent_id}") + except DatabaseError: + raise + except Exception as e: + logger.warning( + f"Could not fetch library agent by library_id {agent_id}: {e}", + exc_info=True, + ) + + return None + async def search_agents( query: str, @@ -69,29 +149,37 @@ async def search_agents( is_featured=False, ) ) - else: # library - logger.info(f"Searching user library for: {query}") - results = await library_db.list_library_agents( - user_id=user_id, # type: ignore[arg-type] - search_term=query, - page_size=10, - ) - for agent in results.agents: - agents.append( - AgentInfo( - id=agent.id, - name=agent.name, - description=agent.description or "", - source="library", - in_library=True, - creator=agent.creator_name, - status=agent.status.value, - can_access_graph=agent.can_access_graph, - has_external_trigger=agent.has_external_trigger, - new_output=agent.new_output, - graph_id=agent.graph_id, - ) + else: + if _is_uuid(query): + logger.info(f"Query looks like UUID, trying direct lookup: {query}") + agent = await _get_library_agent_by_id(user_id, query) # type: ignore[arg-type] + if agent: + agents.append(agent) + logger.info(f"Found agent by direct ID lookup: {agent.name}") + + if not agents: + logger.info(f"Searching user library for: {query}") + results = await library_db.list_library_agents( + user_id=user_id, # type: ignore[arg-type] + search_term=query, + page_size=10, ) + for agent in results.agents: + agents.append( + AgentInfo( + id=agent.id, + name=agent.name, + description=agent.description or "", + source="library", + in_library=True, + creator=agent.creator_name, + status=agent.status.value, + can_access_graph=agent.can_access_graph, + has_external_trigger=agent.has_external_trigger, + new_output=agent.new_output, + graph_id=agent.graph_id, + ) + ) logger.info(f"Found {len(agents)} agents in {source}") except NotFoundError: pass diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py b/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py index 6b3784e323..adb2c78fce 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py @@ -8,7 +8,10 @@ from backend.api.features.chat.model import ChatSession from .agent_generator import ( AgentGeneratorNotConfiguredError, decompose_goal, + enrich_library_agents_from_steps, generate_agent, + get_all_relevant_agents_for_generation, + get_user_message_for_error, save_agent_to_library, ) from .base import BaseTool @@ -102,9 +105,24 @@ class CreateAgentTool(BaseTool): session_id=session_id, ) - # Step 1: Decompose goal into steps + library_agents = None + if user_id: + try: + library_agents = await get_all_relevant_agents_for_generation( + user_id=user_id, + search_query=description, + include_marketplace=True, + ) + logger.debug( + f"Found {len(library_agents)} relevant agents for sub-agent composition" + ) + except Exception as e: + logger.warning(f"Failed to fetch library agents: {e}") + try: - decomposition_result = await decompose_goal(description, context) + decomposition_result = await decompose_goal( + description, context, library_agents + ) except AgentGeneratorNotConfiguredError: return ErrorResponse( message=( @@ -117,15 +135,31 @@ class CreateAgentTool(BaseTool): if decomposition_result is None: return ErrorResponse( - message="Failed to analyze the goal. The agent generation service may be unavailable or timed out. Please try again.", + message="Failed to analyze the goal. The agent generation service may be unavailable. Please try again.", error="decomposition_failed", - details={ - "description": description[:100] - }, # Include context for debugging + details={"description": description[:100]}, + session_id=session_id, + ) + + if decomposition_result.get("type") == "error": + error_msg = decomposition_result.get("error", "Unknown error") + error_type = decomposition_result.get("error_type", "unknown") + user_message = get_user_message_for_error( + error_type, + operation="analyze the goal", + llm_parse_message="The AI had trouble understanding this request. Please try rephrasing your goal.", + ) + return ErrorResponse( + message=user_message, + error=f"decomposition_failed:{error_type}", + details={ + "description": description[:100], + "service_error": error_msg, + "error_type": error_type, + }, session_id=session_id, ) - # Check if LLM returned clarifying questions if decomposition_result.get("type") == "clarifying_questions": questions = decomposition_result.get("questions", []) return ClarificationNeededResponse( @@ -144,7 +178,6 @@ class CreateAgentTool(BaseTool): session_id=session_id, ) - # Check for unachievable/vague goals if decomposition_result.get("type") == "unachievable_goal": suggested = decomposition_result.get("suggested_goal", "") reason = decomposition_result.get("reason", "") @@ -171,9 +204,22 @@ class CreateAgentTool(BaseTool): session_id=session_id, ) - # Step 2: Generate agent JSON (external service handles fixing and validation) + if user_id and library_agents is not None: + try: + library_agents = await enrich_library_agents_from_steps( + user_id=user_id, + decomposition_result=decomposition_result, + existing_agents=library_agents, + include_marketplace=True, + ) + logger.debug( + f"After enrichment: {len(library_agents)} total agents for sub-agent composition" + ) + except Exception as e: + logger.warning(f"Failed to enrich library agents from steps: {e}") + try: - agent_json = await generate_agent(decomposition_result) + agent_json = await generate_agent(decomposition_result, library_agents) except AgentGeneratorNotConfiguredError: return ErrorResponse( message=( @@ -186,11 +232,34 @@ class CreateAgentTool(BaseTool): if agent_json is None: return ErrorResponse( - message="Failed to generate the agent. The agent generation service may be unavailable or timed out. Please try again.", + message="Failed to generate the agent. The agent generation service may be unavailable. Please try again.", error="generation_failed", + details={"description": description[:100]}, + session_id=session_id, + ) + + if isinstance(agent_json, dict) and agent_json.get("type") == "error": + error_msg = agent_json.get("error", "Unknown error") + error_type = agent_json.get("error_type", "unknown") + user_message = get_user_message_for_error( + error_type, + operation="generate the agent", + llm_parse_message="The AI had trouble generating the agent. Please try again or simplify your goal.", + validation_message=( + "I wasn't able to create a valid agent for this request. " + "The generated workflow had some structural issues. " + "Please try simplifying your goal or breaking it into smaller steps." + ), + error_details=error_msg, + ) + return ErrorResponse( + message=user_message, + error=f"generation_failed:{error_type}", details={ - "description": description[:100] - }, # Include context for debugging + "description": description[:100], + "service_error": error_msg, + "error_type": error_type, + }, session_id=session_id, ) @@ -199,7 +268,6 @@ class CreateAgentTool(BaseTool): node_count = len(agent_json.get("nodes", [])) link_count = len(agent_json.get("links", [])) - # Step 3: Preview or save if not save: return AgentPreviewResponse( message=( @@ -214,7 +282,6 @@ class CreateAgentTool(BaseTool): session_id=session_id, ) - # Save to library if not user_id: return ErrorResponse( message="You must be logged in to save agents.", @@ -232,7 +299,7 @@ class CreateAgentTool(BaseTool): agent_id=created_graph.id, agent_name=created_graph.name, library_agent_id=library_agent.id, - library_agent_link=f"/library/{library_agent.id}", + library_agent_link=f"/library/agents/{library_agent.id}", agent_page_link=f"/build?flowID={created_graph.id}", session_id=session_id, ) diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py b/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py index 7c4da8ad43..2c2c48226b 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py @@ -9,6 +9,8 @@ from .agent_generator import ( AgentGeneratorNotConfiguredError, generate_agent_patch, get_agent_as_json, + get_all_relevant_agents_for_generation, + get_user_message_for_error, save_agent_to_library, ) from .base import BaseTool @@ -116,7 +118,6 @@ class EditAgentTool(BaseTool): session_id=session_id, ) - # Step 1: Fetch current agent current_agent = await get_agent_as_json(agent_id, user_id) if current_agent is None: @@ -126,14 +127,30 @@ class EditAgentTool(BaseTool): session_id=session_id, ) - # Build the update request with context + library_agents = None + if user_id: + try: + graph_id = current_agent.get("id") + library_agents = await get_all_relevant_agents_for_generation( + user_id=user_id, + search_query=changes, + exclude_graph_id=graph_id, + include_marketplace=True, + ) + logger.debug( + f"Found {len(library_agents)} relevant agents for sub-agent composition" + ) + except Exception as e: + logger.warning(f"Failed to fetch library agents: {e}") + update_request = changes if context: update_request = f"{changes}\n\nAdditional context:\n{context}" - # Step 2: Generate updated agent (external service handles fixing and validation) try: - result = await generate_agent_patch(update_request, current_agent) + result = await generate_agent_patch( + update_request, current_agent, library_agents + ) except AgentGeneratorNotConfiguredError: return ErrorResponse( message=( @@ -152,7 +169,28 @@ class EditAgentTool(BaseTool): session_id=session_id, ) - # Check if LLM returned clarifying questions + if isinstance(result, dict) and result.get("type") == "error": + error_msg = result.get("error", "Unknown error") + error_type = result.get("error_type", "unknown") + user_message = get_user_message_for_error( + error_type, + operation="generate the changes", + llm_parse_message="The AI had trouble generating the changes. Please try again or simplify your request.", + validation_message="The generated changes failed validation. Please try rephrasing your request.", + error_details=error_msg, + ) + return ErrorResponse( + message=user_message, + error=f"update_generation_failed:{error_type}", + details={ + "agent_id": agent_id, + "changes": changes[:100], + "service_error": error_msg, + "error_type": error_type, + }, + session_id=session_id, + ) + if result.get("type") == "clarifying_questions": questions = result.get("questions", []) return ClarificationNeededResponse( @@ -171,7 +209,6 @@ class EditAgentTool(BaseTool): session_id=session_id, ) - # Result is the updated agent JSON updated_agent = result agent_name = updated_agent.get("name", "Updated Agent") @@ -179,7 +216,6 @@ class EditAgentTool(BaseTool): node_count = len(updated_agent.get("nodes", [])) link_count = len(updated_agent.get("links", [])) - # Step 3: Preview or save if not save: return AgentPreviewResponse( message=( @@ -195,7 +231,6 @@ class EditAgentTool(BaseTool): session_id=session_id, ) - # Save to library (creates a new version) if not user_id: return ErrorResponse( message="You must be logged in to save agents.", @@ -213,7 +248,7 @@ class EditAgentTool(BaseTool): agent_id=created_graph.id, agent_name=created_graph.name, library_agent_id=library_agent.id, - library_agent_link=f"/library/{library_agent.id}", + library_agent_link=f"/library/agents/{library_agent.id}", agent_page_link=f"/build?flowID={created_graph.id}", session_id=session_id, ) diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/utils.py b/autogpt_platform/backend/backend/api/features/chat/tools/utils.py index a2ac91dc65..0046d0b249 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/utils.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/utils.py @@ -8,7 +8,7 @@ from backend.api.features.library import model as library_model from backend.api.features.store import db as store_db from backend.data import graph as graph_db from backend.data.graph import GraphModel -from backend.data.model import CredentialsFieldInfo, CredentialsMetaInput +from backend.data.model import Credentials, CredentialsFieldInfo, CredentialsMetaInput from backend.integrations.creds_manager import IntegrationCredentialsManager from backend.util.exceptions import NotFoundError @@ -266,13 +266,14 @@ async def match_user_credentials_to_graph( credential_requirements, _node_fields, ) in aggregated_creds.items(): - # Find first matching credential by provider and type + # Find first matching credential by provider, type, and scopes matching_cred = next( ( cred for cred in available_creds if cred.provider in credential_requirements.provider and cred.type in credential_requirements.supported_types + and _credential_has_required_scopes(cred, credential_requirements) ), None, ) @@ -296,10 +297,17 @@ async def match_user_credentials_to_graph( f"{credential_field_name} (validation failed: {e})" ) else: + # Build a helpful error message including scope requirements + error_parts = [ + f"provider in {list(credential_requirements.provider)}", + f"type in {list(credential_requirements.supported_types)}", + ] + if credential_requirements.required_scopes: + error_parts.append( + f"scopes including {list(credential_requirements.required_scopes)}" + ) missing_creds.append( - f"{credential_field_name} " - f"(requires provider in {list(credential_requirements.provider)}, " - f"type in {list(credential_requirements.supported_types)})" + f"{credential_field_name} (requires {', '.join(error_parts)})" ) logger.info( @@ -309,6 +317,28 @@ async def match_user_credentials_to_graph( return graph_credentials_inputs, missing_creds +def _credential_has_required_scopes( + credential: Credentials, + requirements: CredentialsFieldInfo, +) -> bool: + """ + Check if a credential has all the scopes required by the block. + + For OAuth2 credentials, verifies that the credential's scopes are a superset + of the required scopes. For other credential types, returns True (no scope check). + """ + # Only OAuth2 credentials have scopes to check + if credential.type != "oauth2": + return True + + # If no scopes are required, any credential matches + if not requirements.required_scopes: + return True + + # Check that credential scopes are a superset of required scopes + return set(credential.scopes).issuperset(requirements.required_scopes) + + async def check_user_has_required_credentials( user_id: str, required_credentials: list[CredentialsMetaInput], diff --git a/autogpt_platform/backend/backend/api/features/library/db.py b/autogpt_platform/backend/backend/api/features/library/db.py index 872fe66b28..394f959953 100644 --- a/autogpt_platform/backend/backend/api/features/library/db.py +++ b/autogpt_platform/backend/backend/api/features/library/db.py @@ -39,6 +39,7 @@ async def list_library_agents( sort_by: library_model.LibraryAgentSort = library_model.LibraryAgentSort.UPDATED_AT, page: int = 1, page_size: int = 50, + include_executions: bool = False, ) -> library_model.LibraryAgentResponse: """ Retrieves a paginated list of LibraryAgent records for a given user. @@ -49,6 +50,9 @@ async def list_library_agents( sort_by: Sorting field (createdAt, updatedAt, isFavorite, isCreatedByUser). page: Current page (1-indexed). page_size: Number of items per page. + include_executions: Whether to include execution data for status calculation. + Defaults to False for performance (UI fetches status separately). + Set to True when accurate status/metrics are needed (e.g., agent generator). Returns: A LibraryAgentResponse containing the list of agents and pagination details. @@ -76,7 +80,6 @@ async def list_library_agents( "isArchived": False, } - # Build search filter if applicable if search_term: where_clause["OR"] = [ { @@ -93,7 +96,6 @@ async def list_library_agents( }, ] - # Determine sorting order_by: prisma.types.LibraryAgentOrderByInput | None = None if sort_by == library_model.LibraryAgentSort.CREATED_AT: @@ -105,7 +107,7 @@ async def list_library_agents( library_agents = await prisma.models.LibraryAgent.prisma().find_many( where=where_clause, include=library_agent_include( - user_id, include_nodes=False, include_executions=False + user_id, include_nodes=False, include_executions=include_executions ), order=order_by, skip=(page - 1) * page_size, diff --git a/autogpt_platform/backend/backend/api/features/library/model.py b/autogpt_platform/backend/backend/api/features/library/model.py index 14d7c7be81..c6bc0e0427 100644 --- a/autogpt_platform/backend/backend/api/features/library/model.py +++ b/autogpt_platform/backend/backend/api/features/library/model.py @@ -9,6 +9,7 @@ import pydantic from backend.data.block import BlockInput from backend.data.graph import GraphModel, GraphSettings, GraphTriggerInfo from backend.data.model import CredentialsMetaInput, is_credentials_field_name +from backend.util.json import loads as json_loads from backend.util.models import Pagination if TYPE_CHECKING: @@ -16,10 +17,10 @@ if TYPE_CHECKING: class LibraryAgentStatus(str, Enum): - COMPLETED = "COMPLETED" # All runs completed - HEALTHY = "HEALTHY" # Agent is running (not all runs have completed) - WAITING = "WAITING" # Agent is queued or waiting to start - ERROR = "ERROR" # Agent is in an error state + COMPLETED = "COMPLETED" + HEALTHY = "HEALTHY" + WAITING = "WAITING" + ERROR = "ERROR" class MarketplaceListingCreator(pydantic.BaseModel): @@ -39,6 +40,30 @@ class MarketplaceListing(pydantic.BaseModel): creator: MarketplaceListingCreator +class RecentExecution(pydantic.BaseModel): + """Summary of a recent execution for quality assessment. + + Used by the LLM to understand the agent's recent performance with specific examples + rather than just aggregate statistics. + """ + + status: str + correctness_score: float | None = None + activity_summary: str | None = None + + +def _parse_settings(settings: dict | str | None) -> GraphSettings: + """Parse settings from database, handling both dict and string formats.""" + if settings is None: + return GraphSettings() + try: + if isinstance(settings, str): + settings = json_loads(settings) + return GraphSettings.model_validate(settings) + except Exception: + return GraphSettings() + + class LibraryAgent(pydantic.BaseModel): """ Represents an agent in the library, including metadata for display and @@ -48,7 +73,7 @@ class LibraryAgent(pydantic.BaseModel): id: str graph_id: str graph_version: int - owner_user_id: str # ID of user who owns/created this agent graph + owner_user_id: str image_url: str | None @@ -64,7 +89,7 @@ class LibraryAgent(pydantic.BaseModel): description: str instructions: str | None = None - input_schema: dict[str, Any] # Should be BlockIOObjectSubSchema in frontend + input_schema: dict[str, Any] output_schema: dict[str, Any] credentials_input_schema: dict[str, Any] | None = pydantic.Field( description="Input schema for credentials required by the agent", @@ -81,25 +106,19 @@ class LibraryAgent(pydantic.BaseModel): ) trigger_setup_info: Optional[GraphTriggerInfo] = None - # Indicates whether there's a new output (based on recent runs) new_output: bool - - # Whether the user can access the underlying graph + execution_count: int = 0 + success_rate: float | None = None + avg_correctness_score: float | None = None + recent_executions: list[RecentExecution] = pydantic.Field( + default_factory=list, + description="List of recent executions with status, score, and summary", + ) can_access_graph: bool - - # Indicates if this agent is the latest version is_latest_version: bool - - # Whether the agent is marked as favorite by the user is_favorite: bool - - # Recommended schedule cron (from marketplace agents) recommended_schedule_cron: str | None = None - - # User-specific settings for this library agent settings: GraphSettings = pydantic.Field(default_factory=GraphSettings) - - # Marketplace listing information if the agent has been published marketplace_listing: Optional["MarketplaceListing"] = None @staticmethod @@ -123,7 +142,6 @@ class LibraryAgent(pydantic.BaseModel): agent_updated_at = agent.AgentGraph.updatedAt lib_agent_updated_at = agent.updatedAt - # Compute updated_at as the latest between library agent and graph updated_at = ( max(agent_updated_at, lib_agent_updated_at) if agent_updated_at @@ -136,7 +154,6 @@ class LibraryAgent(pydantic.BaseModel): creator_name = agent.Creator.name or "Unknown" creator_image_url = agent.Creator.avatarUrl or "" - # Logic to calculate status and new_output week_ago = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta( days=7 ) @@ -145,13 +162,55 @@ class LibraryAgent(pydantic.BaseModel): status = status_result.status new_output = status_result.new_output - # Check if user can access the graph - can_access_graph = agent.AgentGraph.userId == agent.userId + execution_count = len(executions) + success_rate: float | None = None + avg_correctness_score: float | None = None + if execution_count > 0: + success_count = sum( + 1 + for e in executions + if e.executionStatus == prisma.enums.AgentExecutionStatus.COMPLETED + ) + success_rate = (success_count / execution_count) * 100 - # Hard-coded to True until a method to check is implemented + correctness_scores = [] + for e in executions: + if e.stats and isinstance(e.stats, dict): + score = e.stats.get("correctness_score") + if score is not None and isinstance(score, (int, float)): + correctness_scores.append(float(score)) + if correctness_scores: + avg_correctness_score = sum(correctness_scores) / len( + correctness_scores + ) + + recent_executions: list[RecentExecution] = [] + for e in executions: + exec_score: float | None = None + exec_summary: str | None = None + if e.stats and isinstance(e.stats, dict): + score = e.stats.get("correctness_score") + if score is not None and isinstance(score, (int, float)): + exec_score = float(score) + summary = e.stats.get("activity_status") + if summary is not None and isinstance(summary, str): + exec_summary = summary + exec_status = ( + e.executionStatus.value + if hasattr(e.executionStatus, "value") + else str(e.executionStatus) + ) + recent_executions.append( + RecentExecution( + status=exec_status, + correctness_score=exec_score, + activity_summary=exec_summary, + ) + ) + + can_access_graph = agent.AgentGraph.userId == agent.userId is_latest_version = True - # Build marketplace_listing if available marketplace_listing_data = None if store_listing and store_listing.ActiveVersion and profile: creator_data = MarketplaceListingCreator( @@ -190,11 +249,15 @@ class LibraryAgent(pydantic.BaseModel): has_sensitive_action=graph.has_sensitive_action, trigger_setup_info=graph.trigger_setup_info, new_output=new_output, + execution_count=execution_count, + success_rate=success_rate, + avg_correctness_score=avg_correctness_score, + recent_executions=recent_executions, can_access_graph=can_access_graph, is_latest_version=is_latest_version, is_favorite=agent.isFavorite, recommended_schedule_cron=agent.AgentGraph.recommendedScheduleCron, - settings=GraphSettings.model_validate(agent.settings), + settings=_parse_settings(agent.settings), marketplace_listing=marketplace_listing_data, ) @@ -220,18 +283,15 @@ def _calculate_agent_status( if not executions: return AgentStatusResult(status=LibraryAgentStatus.COMPLETED, new_output=False) - # Track how many times each execution status appears status_counts = {status: 0 for status in prisma.enums.AgentExecutionStatus} new_output = False for execution in executions: - # Check if there's a completed run more recent than `recent_threshold` if execution.createdAt >= recent_threshold: if execution.executionStatus == prisma.enums.AgentExecutionStatus.COMPLETED: new_output = True status_counts[execution.executionStatus] += 1 - # Determine the final status based on counts if status_counts[prisma.enums.AgentExecutionStatus.FAILED] > 0: return AgentStatusResult(status=LibraryAgentStatus.ERROR, new_output=new_output) elif status_counts[prisma.enums.AgentExecutionStatus.QUEUED] > 0: diff --git a/autogpt_platform/backend/backend/api/features/store/db.py b/autogpt_platform/backend/backend/api/features/store/db.py index 956fdfa7da..850a2bc3e9 100644 --- a/autogpt_platform/backend/backend/api/features/store/db.py +++ b/autogpt_platform/backend/backend/api/features/store/db.py @@ -112,6 +112,7 @@ async def get_store_agents( description=agent["description"], runs=agent["runs"], rating=agent["rating"], + agent_graph_id=agent.get("agentGraphId", ""), ) store_agents.append(store_agent) except Exception as e: @@ -170,6 +171,7 @@ async def get_store_agents( description=agent.description, runs=agent.runs, rating=agent.rating, + agent_graph_id=agent.agentGraphId, ) # Add to the list only if creation was successful store_agents.append(store_agent) diff --git a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py index 8b0884bb24..e1b8f402c8 100644 --- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py +++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py @@ -600,6 +600,7 @@ async def hybrid_search( sa.featured, sa.is_available, sa.updated_at, + sa."agentGraphId", -- Searchable text for BM25 reranking COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text, -- Semantic score @@ -659,6 +660,7 @@ async def hybrid_search( featured, is_available, updated_at, + "agentGraphId", searchable_text, semantic_score, lexical_score, diff --git a/autogpt_platform/backend/backend/api/features/store/model.py b/autogpt_platform/backend/backend/api/features/store/model.py index a3310b96fc..d66b91807d 100644 --- a/autogpt_platform/backend/backend/api/features/store/model.py +++ b/autogpt_platform/backend/backend/api/features/store/model.py @@ -38,6 +38,7 @@ class StoreAgent(pydantic.BaseModel): description: str runs: int rating: float + agent_graph_id: str class StoreAgentsResponse(pydantic.BaseModel): diff --git a/autogpt_platform/backend/backend/api/features/store/model_test.py b/autogpt_platform/backend/backend/api/features/store/model_test.py index fd09a0cf77..c4109f4603 100644 --- a/autogpt_platform/backend/backend/api/features/store/model_test.py +++ b/autogpt_platform/backend/backend/api/features/store/model_test.py @@ -26,11 +26,13 @@ def test_store_agent(): description="Test description", runs=50, rating=4.5, + agent_graph_id="test-graph-id", ) assert agent.slug == "test-agent" assert agent.agent_name == "Test Agent" assert agent.runs == 50 assert agent.rating == 4.5 + assert agent.agent_graph_id == "test-graph-id" def test_store_agents_response(): @@ -46,6 +48,7 @@ def test_store_agents_response(): description="Test description", runs=50, rating=4.5, + agent_graph_id="test-graph-id", ) ], pagination=store_model.Pagination( diff --git a/autogpt_platform/backend/backend/api/features/store/routes_test.py b/autogpt_platform/backend/backend/api/features/store/routes_test.py index 36431c20ec..fcef3f845a 100644 --- a/autogpt_platform/backend/backend/api/features/store/routes_test.py +++ b/autogpt_platform/backend/backend/api/features/store/routes_test.py @@ -82,6 +82,7 @@ def test_get_agents_featured( description="Featured agent description", runs=100, rating=4.5, + agent_graph_id="test-graph-1", ) ], pagination=store_model.Pagination( @@ -127,6 +128,7 @@ def test_get_agents_by_creator( description="Creator agent description", runs=50, rating=4.0, + agent_graph_id="test-graph-2", ) ], pagination=store_model.Pagination( @@ -172,6 +174,7 @@ def test_get_agents_sorted( description="Top agent description", runs=1000, rating=5.0, + agent_graph_id="test-graph-3", ) ], pagination=store_model.Pagination( @@ -217,6 +220,7 @@ def test_get_agents_search( description="Specific search term description", runs=75, rating=4.2, + agent_graph_id="test-graph-search", ) ], pagination=store_model.Pagination( @@ -262,6 +266,7 @@ def test_get_agents_category( description="Category agent description", runs=60, rating=4.1, + agent_graph_id="test-graph-category", ) ], pagination=store_model.Pagination( @@ -306,6 +311,7 @@ def test_get_agents_pagination( description=f"Agent {i} description", runs=i * 10, rating=4.0, + agent_graph_id="test-graph-2", ) for i in range(5) ], diff --git a/autogpt_platform/backend/backend/api/features/store/test_cache_delete.py b/autogpt_platform/backend/backend/api/features/store/test_cache_delete.py index dd9be1f4ab..298c51d47c 100644 --- a/autogpt_platform/backend/backend/api/features/store/test_cache_delete.py +++ b/autogpt_platform/backend/backend/api/features/store/test_cache_delete.py @@ -33,6 +33,7 @@ class TestCacheDeletion: description="Test description", runs=100, rating=4.5, + agent_graph_id="test-graph-id", ) ], pagination=Pagination( diff --git a/autogpt_platform/backend/backend/blocks/llm.py b/autogpt_platform/backend/backend/blocks/llm.py index fdcd7f3568..732fb1354c 100644 --- a/autogpt_platform/backend/backend/blocks/llm.py +++ b/autogpt_platform/backend/backend/blocks/llm.py @@ -115,7 +115,6 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta): CLAUDE_4_5_OPUS = "claude-opus-4-5-20251101" CLAUDE_4_5_SONNET = "claude-sonnet-4-5-20250929" CLAUDE_4_5_HAIKU = "claude-haiku-4-5-20251001" - CLAUDE_3_7_SONNET = "claude-3-7-sonnet-20250219" CLAUDE_3_HAIKU = "claude-3-haiku-20240307" # AI/ML API models AIML_API_QWEN2_5_72B = "Qwen/Qwen2.5-72B-Instruct-Turbo" @@ -280,9 +279,6 @@ MODEL_METADATA = { LlmModel.CLAUDE_4_5_HAIKU: ModelMetadata( "anthropic", 200000, 64000, "Claude Haiku 4.5", "Anthropic", "Anthropic", 2 ), # claude-haiku-4-5-20251001 - LlmModel.CLAUDE_3_7_SONNET: ModelMetadata( - "anthropic", 200000, 64000, "Claude 3.7 Sonnet", "Anthropic", "Anthropic", 2 - ), # claude-3-7-sonnet-20250219 LlmModel.CLAUDE_3_HAIKU: ModelMetadata( "anthropic", 200000, 4096, "Claude 3 Haiku", "Anthropic", "Anthropic", 1 ), # claude-3-haiku-20240307 diff --git a/autogpt_platform/backend/backend/blocks/stagehand/blocks.py b/autogpt_platform/backend/backend/blocks/stagehand/blocks.py index be1d736962..4d5d6bf4f3 100644 --- a/autogpt_platform/backend/backend/blocks/stagehand/blocks.py +++ b/autogpt_platform/backend/backend/blocks/stagehand/blocks.py @@ -83,7 +83,7 @@ class StagehandRecommendedLlmModel(str, Enum): GPT41_MINI = "gpt-4.1-mini-2025-04-14" # Anthropic - CLAUDE_3_7_SONNET = "claude-3-7-sonnet-20250219" + CLAUDE_4_5_SONNET = "claude-sonnet-4-5-20250929" @property def provider_name(self) -> str: @@ -137,7 +137,7 @@ class StagehandObserveBlock(Block): model: StagehandRecommendedLlmModel = SchemaField( title="LLM Model", description="LLM to use for Stagehand (provider is inferred)", - default=StagehandRecommendedLlmModel.CLAUDE_3_7_SONNET, + default=StagehandRecommendedLlmModel.CLAUDE_4_5_SONNET, advanced=False, ) model_credentials: AICredentials = AICredentialsField() @@ -230,7 +230,7 @@ class StagehandActBlock(Block): model: StagehandRecommendedLlmModel = SchemaField( title="LLM Model", description="LLM to use for Stagehand (provider is inferred)", - default=StagehandRecommendedLlmModel.CLAUDE_3_7_SONNET, + default=StagehandRecommendedLlmModel.CLAUDE_4_5_SONNET, advanced=False, ) model_credentials: AICredentials = AICredentialsField() @@ -330,7 +330,7 @@ class StagehandExtractBlock(Block): model: StagehandRecommendedLlmModel = SchemaField( title="LLM Model", description="LLM to use for Stagehand (provider is inferred)", - default=StagehandRecommendedLlmModel.CLAUDE_3_7_SONNET, + default=StagehandRecommendedLlmModel.CLAUDE_4_5_SONNET, advanced=False, ) model_credentials: AICredentials = AICredentialsField() diff --git a/autogpt_platform/backend/backend/data/block_cost_config.py b/autogpt_platform/backend/backend/data/block_cost_config.py index 1b54ae0942..f46cc726f0 100644 --- a/autogpt_platform/backend/backend/data/block_cost_config.py +++ b/autogpt_platform/backend/backend/data/block_cost_config.py @@ -81,7 +81,6 @@ MODEL_COST: dict[LlmModel, int] = { LlmModel.CLAUDE_4_5_HAIKU: 4, LlmModel.CLAUDE_4_5_OPUS: 14, LlmModel.CLAUDE_4_5_SONNET: 9, - LlmModel.CLAUDE_3_7_SONNET: 5, LlmModel.CLAUDE_3_HAIKU: 1, LlmModel.AIML_API_QWEN2_5_72B: 1, LlmModel.AIML_API_LLAMA3_1_70B: 1, diff --git a/autogpt_platform/backend/backend/data/graph.py b/autogpt_platform/backend/backend/data/graph.py index c1f38f81d5..ee6cd2e4b0 100644 --- a/autogpt_platform/backend/backend/data/graph.py +++ b/autogpt_platform/backend/backend/data/graph.py @@ -1028,6 +1028,39 @@ async def get_graph( return GraphModel.from_db(graph, for_export) +async def get_store_listed_graphs(*graph_ids: str) -> dict[str, GraphModel]: + """Batch-fetch multiple store-listed graphs by their IDs. + + Only returns graphs that have approved store listings (publicly available). + Does not require permission checks since store-listed graphs are public. + + Args: + *graph_ids: Variable number of graph IDs to fetch + + Returns: + Dict mapping graph_id to GraphModel for graphs with approved store listings + """ + if not graph_ids: + return {} + + store_listings = await StoreListingVersion.prisma().find_many( + where={ + "agentGraphId": {"in": list(graph_ids)}, + "submissionStatus": SubmissionStatus.APPROVED, + "isDeleted": False, + }, + include={"AgentGraph": {"include": AGENT_GRAPH_INCLUDE}}, + distinct=["agentGraphId"], + order={"agentGraphVersion": "desc"}, + ) + + return { + listing.agentGraphId: GraphModel.from_db(listing.AgentGraph) + for listing in store_listings + if listing.AgentGraph + } + + async def get_graph_as_admin( graph_id: str, version: int | None = None, diff --git a/autogpt_platform/backend/backend/data/model.py b/autogpt_platform/backend/backend/data/model.py index 2cc73f6b7b..331126fbd6 100644 --- a/autogpt_platform/backend/backend/data/model.py +++ b/autogpt_platform/backend/backend/data/model.py @@ -666,10 +666,16 @@ class CredentialsFieldInfo(BaseModel, Generic[CP, CT]): if not (self.discriminator and self.discriminator_mapping): return self + try: + provider = self.discriminator_mapping[discriminator_value] + except KeyError: + raise ValueError( + f"Model '{discriminator_value}' is not supported. " + "It may have been deprecated. Please update your agent configuration." + ) + return CredentialsFieldInfo( - credentials_provider=frozenset( - [self.discriminator_mapping[discriminator_value]] - ), + credentials_provider=frozenset([provider]), credentials_types=self.supported_types, credentials_scopes=self.required_scopes, discriminator=self.discriminator, diff --git a/autogpt_platform/backend/backend/integrations/webhooks/utils_test.py b/autogpt_platform/backend/backend/integrations/webhooks/utils_test.py new file mode 100644 index 0000000000..bc502a8e44 --- /dev/null +++ b/autogpt_platform/backend/backend/integrations/webhooks/utils_test.py @@ -0,0 +1,39 @@ +from urllib.parse import urlparse + +import fastapi +from fastapi.routing import APIRoute + +from backend.api.features.integrations.router import router as integrations_router +from backend.integrations.providers import ProviderName +from backend.integrations.webhooks import utils as webhooks_utils + + +def test_webhook_ingress_url_matches_route(monkeypatch) -> None: + app = fastapi.FastAPI() + app.include_router(integrations_router, prefix="/api/integrations") + + provider = ProviderName.GITHUB + webhook_id = "webhook_123" + base_url = "https://example.com" + + monkeypatch.setattr(webhooks_utils.app_config, "platform_base_url", base_url) + + route = next( + route + for route in integrations_router.routes + if isinstance(route, APIRoute) + and route.path == "/{provider}/webhooks/{webhook_id}/ingress" + and "POST" in route.methods + ) + expected_path = f"/api/integrations{route.path}".format( + provider=provider.value, + webhook_id=webhook_id, + ) + actual_url = urlparse(webhooks_utils.webhook_ingress_url(provider, webhook_id)) + expected_base = urlparse(base_url) + + assert (actual_url.scheme, actual_url.netloc) == ( + expected_base.scheme, + expected_base.netloc, + ) + assert actual_url.path == expected_path diff --git a/autogpt_platform/backend/migrations/20260126120000_migrate_claude_3_7_to_4_5_sonnet/migration.sql b/autogpt_platform/backend/migrations/20260126120000_migrate_claude_3_7_to_4_5_sonnet/migration.sql new file mode 100644 index 0000000000..5746c80820 --- /dev/null +++ b/autogpt_platform/backend/migrations/20260126120000_migrate_claude_3_7_to_4_5_sonnet/migration.sql @@ -0,0 +1,22 @@ +-- Migrate Claude 3.7 Sonnet to Claude 4.5 Sonnet +-- This updates all AgentNode blocks that use the deprecated Claude 3.7 Sonnet model +-- Anthropic is retiring claude-3-7-sonnet-20250219 on February 19, 2026 + +-- Update AgentNode constant inputs +UPDATE "AgentNode" +SET "constantInput" = JSONB_SET( + "constantInput"::jsonb, + '{model}', + '"claude-sonnet-4-5-20250929"'::jsonb + ) +WHERE "constantInput"::jsonb->>'model' = 'claude-3-7-sonnet-20250219'; + +-- Update AgentPreset input overrides (stored in AgentNodeExecutionInputOutput) +UPDATE "AgentNodeExecutionInputOutput" +SET "data" = JSONB_SET( + "data"::jsonb, + '{model}', + '"claude-sonnet-4-5-20250929"'::jsonb + ) +WHERE "agentPresetId" IS NOT NULL + AND "data"::jsonb->>'model' = 'claude-3-7-sonnet-20250219'; diff --git a/autogpt_platform/backend/snapshots/agts_by_creator b/autogpt_platform/backend/snapshots/agts_by_creator index 4d6dd12920..3f2e128a0d 100644 --- a/autogpt_platform/backend/snapshots/agts_by_creator +++ b/autogpt_platform/backend/snapshots/agts_by_creator @@ -9,7 +9,8 @@ "sub_heading": "Creator agent subheading", "description": "Creator agent description", "runs": 50, - "rating": 4.0 + "rating": 4.0, + "agent_graph_id": "test-graph-2" } ], "pagination": { diff --git a/autogpt_platform/backend/snapshots/agts_category b/autogpt_platform/backend/snapshots/agts_category index f65925ead3..4d0531763c 100644 --- a/autogpt_platform/backend/snapshots/agts_category +++ b/autogpt_platform/backend/snapshots/agts_category @@ -9,7 +9,8 @@ "sub_heading": "Category agent subheading", "description": "Category agent description", "runs": 60, - "rating": 4.1 + "rating": 4.1, + "agent_graph_id": "test-graph-category" } ], "pagination": { diff --git a/autogpt_platform/backend/snapshots/agts_pagination b/autogpt_platform/backend/snapshots/agts_pagination index 82e7f5f9bf..7b946157fb 100644 --- a/autogpt_platform/backend/snapshots/agts_pagination +++ b/autogpt_platform/backend/snapshots/agts_pagination @@ -9,7 +9,8 @@ "sub_heading": "Agent 0 subheading", "description": "Agent 0 description", "runs": 0, - "rating": 4.0 + "rating": 4.0, + "agent_graph_id": "test-graph-2" }, { "slug": "agent-1", @@ -20,7 +21,8 @@ "sub_heading": "Agent 1 subheading", "description": "Agent 1 description", "runs": 10, - "rating": 4.0 + "rating": 4.0, + "agent_graph_id": "test-graph-2" }, { "slug": "agent-2", @@ -31,7 +33,8 @@ "sub_heading": "Agent 2 subheading", "description": "Agent 2 description", "runs": 20, - "rating": 4.0 + "rating": 4.0, + "agent_graph_id": "test-graph-2" }, { "slug": "agent-3", @@ -42,7 +45,8 @@ "sub_heading": "Agent 3 subheading", "description": "Agent 3 description", "runs": 30, - "rating": 4.0 + "rating": 4.0, + "agent_graph_id": "test-graph-2" }, { "slug": "agent-4", @@ -53,7 +57,8 @@ "sub_heading": "Agent 4 subheading", "description": "Agent 4 description", "runs": 40, - "rating": 4.0 + "rating": 4.0, + "agent_graph_id": "test-graph-2" } ], "pagination": { diff --git a/autogpt_platform/backend/snapshots/agts_search b/autogpt_platform/backend/snapshots/agts_search index ca3f504584..ae9cc116bc 100644 --- a/autogpt_platform/backend/snapshots/agts_search +++ b/autogpt_platform/backend/snapshots/agts_search @@ -9,7 +9,8 @@ "sub_heading": "Search agent subheading", "description": "Specific search term description", "runs": 75, - "rating": 4.2 + "rating": 4.2, + "agent_graph_id": "test-graph-search" } ], "pagination": { diff --git a/autogpt_platform/backend/snapshots/agts_sorted b/autogpt_platform/backend/snapshots/agts_sorted index cddead76a5..b182256b2c 100644 --- a/autogpt_platform/backend/snapshots/agts_sorted +++ b/autogpt_platform/backend/snapshots/agts_sorted @@ -9,7 +9,8 @@ "sub_heading": "Top agent subheading", "description": "Top agent description", "runs": 1000, - "rating": 5.0 + "rating": 5.0, + "agent_graph_id": "test-graph-3" } ], "pagination": { diff --git a/autogpt_platform/backend/snapshots/feat_agts b/autogpt_platform/backend/snapshots/feat_agts index d57996a768..4f85786434 100644 --- a/autogpt_platform/backend/snapshots/feat_agts +++ b/autogpt_platform/backend/snapshots/feat_agts @@ -9,7 +9,8 @@ "sub_heading": "Featured agent subheading", "description": "Featured agent description", "runs": 100, - "rating": 4.5 + "rating": 4.5, + "agent_graph_id": "test-graph-1" } ], "pagination": { diff --git a/autogpt_platform/backend/snapshots/lib_agts_search b/autogpt_platform/backend/snapshots/lib_agts_search index 67c307b09e..3ce8402b63 100644 --- a/autogpt_platform/backend/snapshots/lib_agts_search +++ b/autogpt_platform/backend/snapshots/lib_agts_search @@ -31,6 +31,10 @@ "has_sensitive_action": false, "trigger_setup_info": null, "new_output": false, + "execution_count": 0, + "success_rate": null, + "avg_correctness_score": null, + "recent_executions": [], "can_access_graph": true, "is_latest_version": true, "is_favorite": false, @@ -72,6 +76,10 @@ "has_sensitive_action": false, "trigger_setup_info": null, "new_output": false, + "execution_count": 0, + "success_rate": null, + "avg_correctness_score": null, + "recent_executions": [], "can_access_graph": false, "is_latest_version": true, "is_favorite": false, diff --git a/autogpt_platform/backend/test/agent_generator/test_core_integration.py b/autogpt_platform/backend/test/agent_generator/test_core_integration.py index bdcc24ba79..05ce4a3aff 100644 --- a/autogpt_platform/backend/test/agent_generator/test_core_integration.py +++ b/autogpt_platform/backend/test/agent_generator/test_core_integration.py @@ -57,7 +57,8 @@ class TestDecomposeGoal: result = await core.decompose_goal("Build a chatbot") - mock_external.assert_called_once_with("Build a chatbot", "") + # library_agents defaults to None + mock_external.assert_called_once_with("Build a chatbot", "", None) assert result == expected_result @pytest.mark.asyncio @@ -74,7 +75,8 @@ class TestDecomposeGoal: await core.decompose_goal("Build a chatbot", "Use Python") - mock_external.assert_called_once_with("Build a chatbot", "Use Python") + # library_agents defaults to None + mock_external.assert_called_once_with("Build a chatbot", "Use Python", None) @pytest.mark.asyncio async def test_returns_none_on_service_failure(self): @@ -109,7 +111,8 @@ class TestGenerateAgent: instructions = {"type": "instructions", "steps": ["Step 1"]} result = await core.generate_agent(instructions) - mock_external.assert_called_once_with(instructions) + # library_agents defaults to None + mock_external.assert_called_once_with(instructions, None) # Result should have id, version, is_active added if not present assert result is not None assert result["name"] == "Test Agent" @@ -174,7 +177,8 @@ class TestGenerateAgentPatch: current_agent = {"nodes": [], "links": []} result = await core.generate_agent_patch("Add a node", current_agent) - mock_external.assert_called_once_with("Add a node", current_agent) + # library_agents defaults to None + mock_external.assert_called_once_with("Add a node", current_agent, None) assert result == expected_result @pytest.mark.asyncio diff --git a/autogpt_platform/backend/test/agent_generator/test_library_agents.py b/autogpt_platform/backend/test/agent_generator/test_library_agents.py new file mode 100644 index 0000000000..8387339582 --- /dev/null +++ b/autogpt_platform/backend/test/agent_generator/test_library_agents.py @@ -0,0 +1,857 @@ +""" +Tests for library agent fetching functionality in agent generator. + +This test suite verifies the search-based library agent fetching, +including the combination of library and marketplace agents. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from backend.api.features.chat.tools.agent_generator import core + + +class TestGetLibraryAgentsForGeneration: + """Test get_library_agents_for_generation function.""" + + @pytest.mark.asyncio + async def test_fetches_agents_with_search_term(self): + """Test that search_term is passed to the library db.""" + # Create a mock agent with proper attribute values + mock_agent = MagicMock() + mock_agent.graph_id = "agent-123" + mock_agent.graph_version = 1 + mock_agent.name = "Email Agent" + mock_agent.description = "Sends emails" + mock_agent.input_schema = {"properties": {}} + mock_agent.output_schema = {"properties": {}} + mock_agent.recent_executions = [] + + mock_response = MagicMock() + mock_response.agents = [mock_agent] + + with patch.object( + core.library_db, + "list_library_agents", + new_callable=AsyncMock, + return_value=mock_response, + ) as mock_list: + result = await core.get_library_agents_for_generation( + user_id="user-123", + search_query="send email", + ) + + mock_list.assert_called_once_with( + user_id="user-123", + search_term="send email", + page=1, + page_size=15, + include_executions=True, + ) + + # Verify result format + assert len(result) == 1 + assert result[0]["graph_id"] == "agent-123" + assert result[0]["name"] == "Email Agent" + + @pytest.mark.asyncio + async def test_excludes_specified_graph_id(self): + """Test that agents with excluded graph_id are filtered out.""" + mock_response = MagicMock() + mock_response.agents = [ + MagicMock( + graph_id="agent-123", + graph_version=1, + name="Agent 1", + description="First agent", + input_schema={}, + output_schema={}, + recent_executions=[], + ), + MagicMock( + graph_id="agent-456", + graph_version=1, + name="Agent 2", + description="Second agent", + input_schema={}, + output_schema={}, + recent_executions=[], + ), + ] + + with patch.object( + core.library_db, + "list_library_agents", + new_callable=AsyncMock, + return_value=mock_response, + ): + result = await core.get_library_agents_for_generation( + user_id="user-123", + exclude_graph_id="agent-123", + ) + + # Verify the excluded agent is not in results + assert len(result) == 1 + assert result[0]["graph_id"] == "agent-456" + + @pytest.mark.asyncio + async def test_respects_max_results(self): + """Test that max_results parameter limits the page_size.""" + mock_response = MagicMock() + mock_response.agents = [] + + with patch.object( + core.library_db, + "list_library_agents", + new_callable=AsyncMock, + return_value=mock_response, + ) as mock_list: + await core.get_library_agents_for_generation( + user_id="user-123", + max_results=5, + ) + + mock_list.assert_called_once_with( + user_id="user-123", + search_term=None, + page=1, + page_size=5, + include_executions=True, + ) + + +class TestSearchMarketplaceAgentsForGeneration: + """Test search_marketplace_agents_for_generation function.""" + + @pytest.mark.asyncio + async def test_searches_marketplace_with_query(self): + """Test that marketplace is searched with the query.""" + mock_response = MagicMock() + mock_response.agents = [ + MagicMock( + agent_name="Public Agent", + description="A public agent", + sub_heading="Does something useful", + creator="creator-1", + agent_graph_id="graph-123", + ) + ] + + mock_graph = MagicMock() + mock_graph.id = "graph-123" + mock_graph.version = 1 + mock_graph.input_schema = {"type": "object"} + mock_graph.output_schema = {"type": "object"} + + with ( + patch( + "backend.api.features.store.db.get_store_agents", + new_callable=AsyncMock, + return_value=mock_response, + ) as mock_search, + patch( + "backend.api.features.chat.tools.agent_generator.core.get_store_listed_graphs", + new_callable=AsyncMock, + return_value={"graph-123": mock_graph}, + ), + ): + result = await core.search_marketplace_agents_for_generation( + search_query="automation", + max_results=10, + ) + + mock_search.assert_called_once_with( + search_query="automation", + page=1, + page_size=10, + ) + + assert len(result) == 1 + assert result[0]["name"] == "Public Agent" + assert result[0]["graph_id"] == "graph-123" + + @pytest.mark.asyncio + async def test_handles_marketplace_error_gracefully(self): + """Test that marketplace errors don't crash the function.""" + with patch( + "backend.api.features.store.db.get_store_agents", + new_callable=AsyncMock, + side_effect=Exception("Marketplace unavailable"), + ): + result = await core.search_marketplace_agents_for_generation( + search_query="test" + ) + + # Should return empty list, not raise exception + assert result == [] + + +class TestGetAllRelevantAgentsForGeneration: + """Test get_all_relevant_agents_for_generation function.""" + + @pytest.mark.asyncio + async def test_combines_library_and_marketplace_agents(self): + """Test that agents from both sources are combined.""" + library_agents = [ + { + "graph_id": "lib-123", + "graph_version": 1, + "name": "Library Agent", + "description": "From library", + "input_schema": {}, + "output_schema": {}, + } + ] + + marketplace_agents = [ + { + "graph_id": "market-456", + "graph_version": 1, + "name": "Market Agent", + "description": "From marketplace", + "input_schema": {}, + "output_schema": {}, + } + ] + + with patch.object( + core, + "get_library_agents_for_generation", + new_callable=AsyncMock, + return_value=library_agents, + ): + with patch.object( + core, + "search_marketplace_agents_for_generation", + new_callable=AsyncMock, + return_value=marketplace_agents, + ): + result = await core.get_all_relevant_agents_for_generation( + user_id="user-123", + search_query="test query", + include_marketplace=True, + ) + + # Library agents should come first + assert len(result) == 2 + assert result[0]["name"] == "Library Agent" + assert result[1]["name"] == "Market Agent" + + @pytest.mark.asyncio + async def test_deduplicates_by_graph_id(self): + """Test that marketplace agents with same graph_id as library are excluded.""" + library_agents = [ + { + "graph_id": "shared-123", + "graph_version": 1, + "name": "Shared Agent", + "description": "From library", + "input_schema": {}, + "output_schema": {}, + } + ] + + marketplace_agents = [ + { + "graph_id": "shared-123", # Same graph_id, should be deduplicated + "graph_version": 1, + "name": "Shared Agent", + "description": "From marketplace", + "input_schema": {}, + "output_schema": {}, + }, + { + "graph_id": "unique-456", + "graph_version": 1, + "name": "Unique Agent", + "description": "Only in marketplace", + "input_schema": {}, + "output_schema": {}, + }, + ] + + with patch.object( + core, + "get_library_agents_for_generation", + new_callable=AsyncMock, + return_value=library_agents, + ): + with patch.object( + core, + "search_marketplace_agents_for_generation", + new_callable=AsyncMock, + return_value=marketplace_agents, + ): + result = await core.get_all_relevant_agents_for_generation( + user_id="user-123", + search_query="test", + include_marketplace=True, + ) + + # Shared Agent from marketplace should be excluded by graph_id + assert len(result) == 2 + names = [a["name"] for a in result] + assert "Shared Agent" in names + assert "Unique Agent" in names + + @pytest.mark.asyncio + async def test_skips_marketplace_when_disabled(self): + """Test that marketplace is not searched when include_marketplace=False.""" + library_agents = [ + { + "graph_id": "lib-123", + "graph_version": 1, + "name": "Library Agent", + "description": "From library", + "input_schema": {}, + "output_schema": {}, + } + ] + + with patch.object( + core, + "get_library_agents_for_generation", + new_callable=AsyncMock, + return_value=library_agents, + ): + with patch.object( + core, + "search_marketplace_agents_for_generation", + new_callable=AsyncMock, + ) as mock_marketplace: + result = await core.get_all_relevant_agents_for_generation( + user_id="user-123", + search_query="test", + include_marketplace=False, + ) + + # Marketplace should not be called + mock_marketplace.assert_not_called() + assert len(result) == 1 + + @pytest.mark.asyncio + async def test_skips_marketplace_when_no_search_query(self): + """Test that marketplace is not searched without a search query.""" + library_agents = [ + { + "graph_id": "lib-123", + "graph_version": 1, + "name": "Library Agent", + "description": "From library", + "input_schema": {}, + "output_schema": {}, + } + ] + + with patch.object( + core, + "get_library_agents_for_generation", + new_callable=AsyncMock, + return_value=library_agents, + ): + with patch.object( + core, + "search_marketplace_agents_for_generation", + new_callable=AsyncMock, + ) as mock_marketplace: + result = await core.get_all_relevant_agents_for_generation( + user_id="user-123", + search_query=None, # No search query + include_marketplace=True, + ) + + # Marketplace should not be called without search query + mock_marketplace.assert_not_called() + assert len(result) == 1 + + +class TestExtractSearchTermsFromSteps: + """Test extract_search_terms_from_steps function.""" + + def test_extracts_terms_from_instructions_type(self): + """Test extraction from valid instructions decomposition result.""" + decomposition_result = { + "type": "instructions", + "steps": [ + { + "description": "Send an email notification", + "block_name": "GmailSendBlock", + }, + {"description": "Fetch weather data", "action": "Get weather API"}, + ], + } + + result = core.extract_search_terms_from_steps(decomposition_result) + + assert "Send an email notification" in result + assert "GmailSendBlock" in result + assert "Fetch weather data" in result + assert "Get weather API" in result + + def test_returns_empty_for_non_instructions_type(self): + """Test that non-instructions types return empty list.""" + decomposition_result = { + "type": "clarifying_questions", + "questions": [{"question": "What email?"}], + } + + result = core.extract_search_terms_from_steps(decomposition_result) + + assert result == [] + + def test_deduplicates_terms_case_insensitively(self): + """Test that duplicate terms are removed (case-insensitive).""" + decomposition_result = { + "type": "instructions", + "steps": [ + {"description": "Send Email", "name": "send email"}, + {"description": "Other task"}, + ], + } + + result = core.extract_search_terms_from_steps(decomposition_result) + + # Should only have one "send email" variant + email_terms = [t for t in result if "email" in t.lower()] + assert len(email_terms) == 1 + + def test_filters_short_terms(self): + """Test that terms with 3 or fewer characters are filtered out.""" + decomposition_result = { + "type": "instructions", + "steps": [ + {"description": "ab", "action": "xyz"}, # Both too short + {"description": "Valid term here"}, + ], + } + + result = core.extract_search_terms_from_steps(decomposition_result) + + assert "ab" not in result + assert "xyz" not in result + assert "Valid term here" in result + + def test_handles_empty_steps(self): + """Test handling of empty steps list.""" + decomposition_result = { + "type": "instructions", + "steps": [], + } + + result = core.extract_search_terms_from_steps(decomposition_result) + + assert result == [] + + +class TestEnrichLibraryAgentsFromSteps: + """Test enrich_library_agents_from_steps function.""" + + @pytest.mark.asyncio + async def test_enriches_with_additional_agents(self): + """Test that additional agents are found based on steps.""" + existing_agents = [ + { + "graph_id": "existing-123", + "graph_version": 1, + "name": "Existing Agent", + "description": "Already fetched", + "input_schema": {}, + "output_schema": {}, + } + ] + + additional_agents = [ + { + "graph_id": "new-456", + "graph_version": 1, + "name": "Email Agent", + "description": "For sending emails", + "input_schema": {}, + "output_schema": {}, + } + ] + + decomposition_result = { + "type": "instructions", + "steps": [ + {"description": "Send email notification"}, + ], + } + + with patch.object( + core, + "get_all_relevant_agents_for_generation", + new_callable=AsyncMock, + return_value=additional_agents, + ): + result = await core.enrich_library_agents_from_steps( + user_id="user-123", + decomposition_result=decomposition_result, + existing_agents=existing_agents, + ) + + # Should have both existing and new agents + assert len(result) == 2 + names = [a["name"] for a in result] + assert "Existing Agent" in names + assert "Email Agent" in names + + @pytest.mark.asyncio + async def test_deduplicates_by_graph_id(self): + """Test that agents with same graph_id are not duplicated.""" + existing_agents = [ + { + "graph_id": "agent-123", + "graph_version": 1, + "name": "Existing Agent", + "description": "Already fetched", + "input_schema": {}, + "output_schema": {}, + } + ] + + # Additional search returns same agent + additional_agents = [ + { + "graph_id": "agent-123", # Same ID + "graph_version": 1, + "name": "Existing Agent Copy", + "description": "Same agent different name", + "input_schema": {}, + "output_schema": {}, + } + ] + + decomposition_result = { + "type": "instructions", + "steps": [{"description": "Some action"}], + } + + with patch.object( + core, + "get_all_relevant_agents_for_generation", + new_callable=AsyncMock, + return_value=additional_agents, + ): + result = await core.enrich_library_agents_from_steps( + user_id="user-123", + decomposition_result=decomposition_result, + existing_agents=existing_agents, + ) + + # Should not duplicate + assert len(result) == 1 + + @pytest.mark.asyncio + async def test_deduplicates_by_name(self): + """Test that agents with same name are not duplicated.""" + existing_agents = [ + { + "graph_id": "agent-123", + "graph_version": 1, + "name": "Email Agent", + "description": "Already fetched", + "input_schema": {}, + "output_schema": {}, + } + ] + + # Additional search returns agent with same name but different ID + additional_agents = [ + { + "graph_id": "agent-456", # Different ID + "graph_version": 1, + "name": "Email Agent", # Same name + "description": "Different agent same name", + "input_schema": {}, + "output_schema": {}, + } + ] + + decomposition_result = { + "type": "instructions", + "steps": [{"description": "Send email"}], + } + + with patch.object( + core, + "get_all_relevant_agents_for_generation", + new_callable=AsyncMock, + return_value=additional_agents, + ): + result = await core.enrich_library_agents_from_steps( + user_id="user-123", + decomposition_result=decomposition_result, + existing_agents=existing_agents, + ) + + # Should not duplicate by name + assert len(result) == 1 + assert result[0].get("graph_id") == "agent-123" # Original kept + + @pytest.mark.asyncio + async def test_returns_existing_when_no_steps(self): + """Test that existing agents are returned when no search terms extracted.""" + existing_agents = [ + { + "graph_id": "existing-123", + "graph_version": 1, + "name": "Existing Agent", + "description": "Already fetched", + "input_schema": {}, + "output_schema": {}, + } + ] + + decomposition_result = { + "type": "clarifying_questions", # Not instructions type + "questions": [], + } + + result = await core.enrich_library_agents_from_steps( + user_id="user-123", + decomposition_result=decomposition_result, + existing_agents=existing_agents, + ) + + # Should return existing unchanged + assert result == existing_agents + + @pytest.mark.asyncio + async def test_limits_search_terms_to_three(self): + """Test that only first 3 search terms are used.""" + existing_agents = [] + + decomposition_result = { + "type": "instructions", + "steps": [ + {"description": "First action"}, + {"description": "Second action"}, + {"description": "Third action"}, + {"description": "Fourth action"}, + {"description": "Fifth action"}, + ], + } + + call_count = 0 + + async def mock_get_agents(*args, **kwargs): + nonlocal call_count + call_count += 1 + return [] + + with patch.object( + core, + "get_all_relevant_agents_for_generation", + side_effect=mock_get_agents, + ): + await core.enrich_library_agents_from_steps( + user_id="user-123", + decomposition_result=decomposition_result, + existing_agents=existing_agents, + ) + + # Should only make 3 calls (limited to first 3 terms) + assert call_count == 3 + + +class TestExtractUuidsFromText: + """Test extract_uuids_from_text function.""" + + def test_extracts_single_uuid(self): + """Test extraction of a single UUID from text.""" + text = "Use my agent 46631191-e8a8-486f-ad90-84f89738321d for this task" + result = core.extract_uuids_from_text(text) + assert len(result) == 1 + assert "46631191-e8a8-486f-ad90-84f89738321d" in result + + def test_extracts_multiple_uuids(self): + """Test extraction of multiple UUIDs from text.""" + text = ( + "Combine agents 11111111-1111-4111-8111-111111111111 " + "and 22222222-2222-4222-9222-222222222222" + ) + result = core.extract_uuids_from_text(text) + assert len(result) == 2 + assert "11111111-1111-4111-8111-111111111111" in result + assert "22222222-2222-4222-9222-222222222222" in result + + def test_deduplicates_uuids(self): + """Test that duplicate UUIDs are deduplicated.""" + text = ( + "Use 46631191-e8a8-486f-ad90-84f89738321d twice: " + "46631191-e8a8-486f-ad90-84f89738321d" + ) + result = core.extract_uuids_from_text(text) + assert len(result) == 1 + + def test_normalizes_to_lowercase(self): + """Test that UUIDs are normalized to lowercase.""" + text = "Use 46631191-E8A8-486F-AD90-84F89738321D" + result = core.extract_uuids_from_text(text) + assert result[0] == "46631191-e8a8-486f-ad90-84f89738321d" + + def test_returns_empty_for_no_uuids(self): + """Test that empty list is returned when no UUIDs found.""" + text = "Create an email agent that sends notifications" + result = core.extract_uuids_from_text(text) + assert result == [] + + def test_ignores_invalid_uuids(self): + """Test that invalid UUID-like strings are ignored.""" + text = "Not a valid UUID: 12345678-1234-1234-1234-123456789abc" + result = core.extract_uuids_from_text(text) + # UUID v4 requires specific patterns (4 in third group, 8/9/a/b in fourth) + assert len(result) == 0 + + +class TestGetLibraryAgentById: + """Test get_library_agent_by_id function (and its alias get_library_agent_by_graph_id).""" + + @pytest.mark.asyncio + async def test_returns_agent_when_found_by_graph_id(self): + """Test that agent is returned when found by graph_id.""" + mock_agent = MagicMock() + mock_agent.graph_id = "agent-123" + mock_agent.graph_version = 1 + mock_agent.name = "Test Agent" + mock_agent.description = "Test description" + mock_agent.input_schema = {"properties": {}} + mock_agent.output_schema = {"properties": {}} + + with patch.object( + core.library_db, + "get_library_agent_by_graph_id", + new_callable=AsyncMock, + return_value=mock_agent, + ): + result = await core.get_library_agent_by_id("user-123", "agent-123") + + assert result is not None + assert result["graph_id"] == "agent-123" + assert result["name"] == "Test Agent" + + @pytest.mark.asyncio + async def test_falls_back_to_library_agent_id(self): + """Test that lookup falls back to library agent ID when graph_id not found.""" + mock_agent = MagicMock() + mock_agent.graph_id = "graph-456" # Different from the lookup ID + mock_agent.graph_version = 1 + mock_agent.name = "Library Agent" + mock_agent.description = "Found by library ID" + mock_agent.input_schema = {"properties": {}} + mock_agent.output_schema = {"properties": {}} + + with ( + patch.object( + core.library_db, + "get_library_agent_by_graph_id", + new_callable=AsyncMock, + return_value=None, # Not found by graph_id + ), + patch.object( + core.library_db, + "get_library_agent", + new_callable=AsyncMock, + return_value=mock_agent, # Found by library ID + ), + ): + result = await core.get_library_agent_by_id("user-123", "library-id-123") + + assert result is not None + assert result["graph_id"] == "graph-456" + assert result["name"] == "Library Agent" + + @pytest.mark.asyncio + async def test_returns_none_when_not_found_by_either_method(self): + """Test that None is returned when agent not found by either method.""" + with ( + patch.object( + core.library_db, + "get_library_agent_by_graph_id", + new_callable=AsyncMock, + return_value=None, + ), + patch.object( + core.library_db, + "get_library_agent", + new_callable=AsyncMock, + side_effect=core.NotFoundError("Not found"), + ), + ): + result = await core.get_library_agent_by_id("user-123", "nonexistent") + + assert result is None + + @pytest.mark.asyncio + async def test_returns_none_on_exception(self): + """Test that None is returned when exception occurs in both lookups.""" + with ( + patch.object( + core.library_db, + "get_library_agent_by_graph_id", + new_callable=AsyncMock, + side_effect=Exception("Database error"), + ), + patch.object( + core.library_db, + "get_library_agent", + new_callable=AsyncMock, + side_effect=Exception("Database error"), + ), + ): + result = await core.get_library_agent_by_id("user-123", "agent-123") + + assert result is None + + @pytest.mark.asyncio + async def test_alias_works(self): + """Test that get_library_agent_by_graph_id is an alias for get_library_agent_by_id.""" + assert core.get_library_agent_by_graph_id is core.get_library_agent_by_id + + +class TestGetAllRelevantAgentsWithUuids: + """Test UUID extraction in get_all_relevant_agents_for_generation.""" + + @pytest.mark.asyncio + async def test_fetches_explicitly_mentioned_agents(self): + """Test that agents mentioned by UUID are fetched directly.""" + mock_agent = MagicMock() + mock_agent.graph_id = "46631191-e8a8-486f-ad90-84f89738321d" + mock_agent.graph_version = 1 + mock_agent.name = "Mentioned Agent" + mock_agent.description = "Explicitly mentioned" + mock_agent.input_schema = {} + mock_agent.output_schema = {} + + mock_response = MagicMock() + mock_response.agents = [] + + with ( + patch.object( + core.library_db, + "get_library_agent_by_graph_id", + new_callable=AsyncMock, + return_value=mock_agent, + ), + patch.object( + core.library_db, + "list_library_agents", + new_callable=AsyncMock, + return_value=mock_response, + ), + ): + result = await core.get_all_relevant_agents_for_generation( + user_id="user-123", + search_query="Use agent 46631191-e8a8-486f-ad90-84f89738321d", + include_marketplace=False, + ) + + assert len(result) == 1 + assert result[0].get("graph_id") == "46631191-e8a8-486f-ad90-84f89738321d" + assert result[0].get("name") == "Mentioned Agent" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/autogpt_platform/backend/test/agent_generator/test_service.py b/autogpt_platform/backend/test/agent_generator/test_service.py index 81ff794532..cc37c428c0 100644 --- a/autogpt_platform/backend/test/agent_generator/test_service.py +++ b/autogpt_platform/backend/test/agent_generator/test_service.py @@ -102,7 +102,7 @@ class TestDecomposeGoalExternal: @pytest.mark.asyncio async def test_decompose_goal_with_context(self): - """Test decomposition with additional context.""" + """Test decomposition with additional context enriched into description.""" mock_response = MagicMock() mock_response.json.return_value = { "success": True, @@ -119,9 +119,12 @@ class TestDecomposeGoalExternal: "Build a chatbot", context="Use Python" ) + expected_description = ( + "Build a chatbot\n\nAdditional context from user:\nUse Python" + ) mock_client.post.assert_called_once_with( "/api/decompose-description", - json={"description": "Build a chatbot", "user_instruction": "Use Python"}, + json={"description": expected_description}, ) @pytest.mark.asyncio @@ -151,15 +154,20 @@ class TestDecomposeGoalExternal: @pytest.mark.asyncio async def test_decompose_goal_handles_http_error(self): """Test decomposition handles HTTP errors gracefully.""" + mock_response = MagicMock() + mock_response.status_code = 500 mock_client = AsyncMock() mock_client.post.side_effect = httpx.HTTPStatusError( - "Server error", request=MagicMock(), response=MagicMock() + "Server error", request=MagicMock(), response=mock_response ) with patch.object(service, "_get_client", return_value=mock_client): result = await service.decompose_goal_external("Build a chatbot") - assert result is None + assert result is not None + assert result.get("type") == "error" + assert result.get("error_type") == "http_error" + assert "Server error" in result.get("error", "") @pytest.mark.asyncio async def test_decompose_goal_handles_request_error(self): @@ -170,7 +178,10 @@ class TestDecomposeGoalExternal: with patch.object(service, "_get_client", return_value=mock_client): result = await service.decompose_goal_external("Build a chatbot") - assert result is None + assert result is not None + assert result.get("type") == "error" + assert result.get("error_type") == "connection_error" + assert "Connection failed" in result.get("error", "") @pytest.mark.asyncio async def test_decompose_goal_handles_service_error(self): @@ -179,6 +190,7 @@ class TestDecomposeGoalExternal: mock_response.json.return_value = { "success": False, "error": "Internal error", + "error_type": "internal_error", } mock_response.raise_for_status = MagicMock() @@ -188,7 +200,10 @@ class TestDecomposeGoalExternal: with patch.object(service, "_get_client", return_value=mock_client): result = await service.decompose_goal_external("Build a chatbot") - assert result is None + assert result is not None + assert result.get("type") == "error" + assert result.get("error") == "Internal error" + assert result.get("error_type") == "internal_error" class TestGenerateAgentExternal: @@ -236,7 +251,10 @@ class TestGenerateAgentExternal: with patch.object(service, "_get_client", return_value=mock_client): result = await service.generate_agent_external({"steps": []}) - assert result is None + assert result is not None + assert result.get("type") == "error" + assert result.get("error_type") == "connection_error" + assert "Connection failed" in result.get("error", "") class TestGenerateAgentPatchExternal: @@ -418,5 +436,139 @@ class TestGetBlocksExternal: assert result is None +class TestLibraryAgentsPassthrough: + """Test that library_agents are passed correctly in all requests.""" + + def setup_method(self): + """Reset client singleton before each test.""" + service._settings = None + service._client = None + + @pytest.mark.asyncio + async def test_decompose_goal_passes_library_agents(self): + """Test that library_agents are included in decompose goal payload.""" + library_agents = [ + { + "graph_id": "agent-123", + "graph_version": 1, + "name": "Email Sender", + "description": "Sends emails", + "input_schema": {"properties": {"to": {"type": "string"}}}, + "output_schema": {"properties": {"sent": {"type": "boolean"}}}, + }, + ] + + mock_response = MagicMock() + mock_response.json.return_value = { + "success": True, + "type": "instructions", + "steps": ["Step 1"], + } + mock_response.raise_for_status = MagicMock() + + mock_client = AsyncMock() + mock_client.post.return_value = mock_response + + with patch.object(service, "_get_client", return_value=mock_client): + await service.decompose_goal_external( + "Send an email", + library_agents=library_agents, + ) + + # Verify library_agents was passed in the payload + call_args = mock_client.post.call_args + assert call_args[1]["json"]["library_agents"] == library_agents + + @pytest.mark.asyncio + async def test_generate_agent_passes_library_agents(self): + """Test that library_agents are included in generate agent payload.""" + library_agents = [ + { + "graph_id": "agent-456", + "graph_version": 2, + "name": "Data Fetcher", + "description": "Fetches data from API", + "input_schema": {"properties": {"url": {"type": "string"}}}, + "output_schema": {"properties": {"data": {"type": "object"}}}, + }, + ] + + mock_response = MagicMock() + mock_response.json.return_value = { + "success": True, + "agent_json": {"name": "Test Agent", "nodes": []}, + } + mock_response.raise_for_status = MagicMock() + + mock_client = AsyncMock() + mock_client.post.return_value = mock_response + + with patch.object(service, "_get_client", return_value=mock_client): + await service.generate_agent_external( + {"steps": ["Step 1"]}, + library_agents=library_agents, + ) + + # Verify library_agents was passed in the payload + call_args = mock_client.post.call_args + assert call_args[1]["json"]["library_agents"] == library_agents + + @pytest.mark.asyncio + async def test_generate_agent_patch_passes_library_agents(self): + """Test that library_agents are included in patch generation payload.""" + library_agents = [ + { + "graph_id": "agent-789", + "graph_version": 1, + "name": "Slack Notifier", + "description": "Sends Slack messages", + "input_schema": {"properties": {"message": {"type": "string"}}}, + "output_schema": {"properties": {"success": {"type": "boolean"}}}, + }, + ] + + mock_response = MagicMock() + mock_response.json.return_value = { + "success": True, + "agent_json": {"name": "Updated Agent", "nodes": []}, + } + mock_response.raise_for_status = MagicMock() + + mock_client = AsyncMock() + mock_client.post.return_value = mock_response + + with patch.object(service, "_get_client", return_value=mock_client): + await service.generate_agent_patch_external( + "Add error handling", + {"name": "Original Agent", "nodes": []}, + library_agents=library_agents, + ) + + # Verify library_agents was passed in the payload + call_args = mock_client.post.call_args + assert call_args[1]["json"]["library_agents"] == library_agents + + @pytest.mark.asyncio + async def test_decompose_goal_without_library_agents(self): + """Test that decompose goal works without library_agents.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "success": True, + "type": "instructions", + "steps": ["Step 1"], + } + mock_response.raise_for_status = MagicMock() + + mock_client = AsyncMock() + mock_client.post.return_value = mock_response + + with patch.object(service, "_get_client", return_value=mock_client): + await service.decompose_goal_external("Build a workflow") + + # Verify library_agents was NOT passed when not provided + call_args = mock_client.post.call_args + assert "library_agents" not in call_args[1]["json"] + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/autogpt_platform/backend/test/e2e_test_data.py b/autogpt_platform/backend/test/e2e_test_data.py index d7576cdad3..7288197a90 100644 --- a/autogpt_platform/backend/test/e2e_test_data.py +++ b/autogpt_platform/backend/test/e2e_test_data.py @@ -43,19 +43,24 @@ faker = Faker() # Constants for data generation limits (reduced for E2E tests) NUM_USERS = 15 NUM_AGENT_BLOCKS = 30 -MIN_GRAPHS_PER_USER = 15 -MAX_GRAPHS_PER_USER = 15 +MIN_GRAPHS_PER_USER = 25 +MAX_GRAPHS_PER_USER = 25 MIN_NODES_PER_GRAPH = 3 MAX_NODES_PER_GRAPH = 6 MIN_PRESETS_PER_USER = 2 MAX_PRESETS_PER_USER = 3 -MIN_AGENTS_PER_USER = 15 -MAX_AGENTS_PER_USER = 15 +MIN_AGENTS_PER_USER = 25 +MAX_AGENTS_PER_USER = 25 MIN_EXECUTIONS_PER_GRAPH = 2 MAX_EXECUTIONS_PER_GRAPH = 8 MIN_REVIEWS_PER_VERSION = 2 MAX_REVIEWS_PER_VERSION = 5 +# Guaranteed minimums for marketplace tests (deterministic) +GUARANTEED_FEATURED_AGENTS = 8 +GUARANTEED_FEATURED_CREATORS = 5 +GUARANTEED_TOP_AGENTS = 10 + def get_image(): """Generate a consistent image URL using picsum.photos service.""" @@ -385,7 +390,7 @@ class TestDataCreator: library_agents = [] for user in self.users: - num_agents = 10 # Create exactly 10 agents per user + num_agents = random.randint(MIN_AGENTS_PER_USER, MAX_AGENTS_PER_USER) # Get available graphs for this user user_graphs = [ @@ -507,14 +512,17 @@ class TestDataCreator: existing_profiles, min(num_creators, len(existing_profiles)) ) - # Mark about 50% of creators as featured (more for testing) - num_featured = max(2, int(num_creators * 0.5)) + # Guarantee at least GUARANTEED_FEATURED_CREATORS featured creators + num_featured = max(GUARANTEED_FEATURED_CREATORS, int(num_creators * 0.5)) num_featured = min( num_featured, len(selected_profiles) ) # Don't exceed available profiles featured_profile_ids = set( random.sample([p.id for p in selected_profiles], num_featured) ) + print( + f"🎯 Creating {num_featured} featured creators (min: {GUARANTEED_FEATURED_CREATORS})" + ) for profile in selected_profiles: try: @@ -545,21 +553,25 @@ class TestDataCreator: return profiles async def create_test_store_submissions(self) -> List[Dict[str, Any]]: - """Create test store submissions using the API function.""" + """Create test store submissions using the API function. + + DETERMINISTIC: Guarantees minimum featured agents for E2E tests. + """ print("Creating test store submissions...") submissions = [] approved_submissions = [] + featured_count = 0 + submission_counter = 0 - # Create a special test submission for test123@gmail.com + # Create a special test submission for test123@gmail.com (ALWAYS approved + featured) test_user = next( (user for user in self.users if user["email"] == "test123@gmail.com"), None ) - if test_user: - # Special test data for consistent testing + if test_user and self.agent_graphs: test_submission_data = { "user_id": test_user["id"], - "agent_id": self.agent_graphs[0]["id"], # Use first available graph + "agent_id": self.agent_graphs[0]["id"], "agent_version": 1, "slug": "test-agent-submission", "name": "Test Agent Submission", @@ -580,37 +592,24 @@ class TestDataCreator: submissions.append(test_submission.model_dump()) print("✅ Created special test store submission for test123@gmail.com") - # Randomly approve, reject, or leave pending the test submission + # ALWAYS approve and feature the test submission if test_submission.store_listing_version_id: - random_value = random.random() - if random_value < 0.4: # 40% chance to approve - approved_submission = await review_store_submission( - store_listing_version_id=test_submission.store_listing_version_id, - is_approved=True, - external_comments="Test submission approved", - internal_comments="Auto-approved test submission", - reviewer_id=test_user["id"], - ) - approved_submissions.append(approved_submission.model_dump()) - print("✅ Approved test store submission") + approved_submission = await review_store_submission( + store_listing_version_id=test_submission.store_listing_version_id, + is_approved=True, + external_comments="Test submission approved", + internal_comments="Auto-approved test submission", + reviewer_id=test_user["id"], + ) + approved_submissions.append(approved_submission.model_dump()) + print("✅ Approved test store submission") - # Mark approved submission as featured - await prisma.storelistingversion.update( - where={"id": test_submission.store_listing_version_id}, - data={"isFeatured": True}, - ) - print("🌟 Marked test agent as FEATURED") - elif random_value < 0.7: # 30% chance to reject (40% to 70%) - await review_store_submission( - store_listing_version_id=test_submission.store_listing_version_id, - is_approved=False, - external_comments="Test submission rejected - needs improvements", - internal_comments="Auto-rejected test submission for E2E testing", - reviewer_id=test_user["id"], - ) - print("❌ Rejected test store submission") - else: # 30% chance to leave pending (70% to 100%) - print("⏳ Left test submission pending for review") + await prisma.storelistingversion.update( + where={"id": test_submission.store_listing_version_id}, + data={"isFeatured": True}, + ) + featured_count += 1 + print("🌟 Marked test agent as FEATURED") except Exception as e: print(f"Error creating test store submission: {e}") @@ -620,7 +619,6 @@ class TestDataCreator: # Create regular submissions for all users for user in self.users: - # Get available graphs for this specific user user_graphs = [ g for g in self.agent_graphs if g.get("userId") == user["id"] ] @@ -631,18 +629,17 @@ class TestDataCreator: ) continue - # Create exactly 4 store submissions per user for submission_index in range(4): graph = random.choice(user_graphs) + submission_counter += 1 try: print( - f"Creating store submission for user {user['id']} with graph {graph['id']} (owner: {graph.get('userId')})" + f"Creating store submission for user {user['id']} with graph {graph['id']}" ) - # Use the API function to create store submission with correct parameters submission = await create_store_submission( - user_id=user["id"], # Must match graph's userId + user_id=user["id"], agent_id=graph["id"], agent_version=graph.get("version", 1), slug=faker.slug(), @@ -651,22 +648,24 @@ class TestDataCreator: video_url=get_video_url() if random.random() < 0.3 else None, image_urls=[get_image() for _ in range(3)], description=faker.text(), - categories=[ - get_category() - ], # Single category from predefined list + categories=[get_category()], changes_summary="Initial E2E test submission", ) submissions.append(submission.model_dump()) print(f"✅ Created store submission: {submission.name}") - # Randomly approve, reject, or leave pending the submission if submission.store_listing_version_id: - random_value = random.random() - if random_value < 0.4: # 40% chance to approve - try: - # Pick a random user as the reviewer (admin) - reviewer_id = random.choice(self.users)["id"] + # DETERMINISTIC: First N submissions are always approved + # First GUARANTEED_FEATURED_AGENTS of those are always featured + should_approve = ( + submission_counter <= GUARANTEED_TOP_AGENTS + or random.random() < 0.4 + ) + should_feature = featured_count < GUARANTEED_FEATURED_AGENTS + if should_approve: + try: + reviewer_id = random.choice(self.users)["id"] approved_submission = await review_store_submission( store_listing_version_id=submission.store_listing_version_id, is_approved=True, @@ -681,16 +680,7 @@ class TestDataCreator: f"✅ Approved store submission: {submission.name}" ) - # Mark some agents as featured during creation (30% chance) - # More likely for creators and first submissions - is_creator = user["id"] in [ - p.get("userId") for p in self.profiles - ] - feature_chance = ( - 0.5 if is_creator else 0.2 - ) # 50% for creators, 20% for others - - if random.random() < feature_chance: + if should_feature: try: await prisma.storelistingversion.update( where={ @@ -698,8 +688,25 @@ class TestDataCreator: }, data={"isFeatured": True}, ) + featured_count += 1 print( - f"🌟 Marked agent as FEATURED: {submission.name}" + f"🌟 Marked agent as FEATURED ({featured_count}/{GUARANTEED_FEATURED_AGENTS}): {submission.name}" + ) + except Exception as e: + print( + f"Warning: Could not mark submission as featured: {e}" + ) + elif random.random() < 0.2: + try: + await prisma.storelistingversion.update( + where={ + "id": submission.store_listing_version_id + }, + data={"isFeatured": True}, + ) + featured_count += 1 + print( + f"🌟 Marked agent as FEATURED (bonus): {submission.name}" ) except Exception as e: print( @@ -710,11 +717,9 @@ class TestDataCreator: print( f"Warning: Could not approve submission {submission.name}: {e}" ) - elif random_value < 0.7: # 30% chance to reject (40% to 70%) + elif random.random() < 0.5: try: - # Pick a random user as the reviewer (admin) reviewer_id = random.choice(self.users)["id"] - await review_store_submission( store_listing_version_id=submission.store_listing_version_id, is_approved=False, @@ -729,7 +734,7 @@ class TestDataCreator: print( f"Warning: Could not reject submission {submission.name}: {e}" ) - else: # 30% chance to leave pending (70% to 100%) + else: print( f"⏳ Left submission pending for review: {submission.name}" ) @@ -743,9 +748,13 @@ class TestDataCreator: traceback.print_exc() continue + print("\n📊 Store Submissions Summary:") + print(f" Created: {len(submissions)}") + print(f" Approved: {len(approved_submissions)}") print( - f"Created {len(submissions)} store submissions, approved {len(approved_submissions)}" + f" Featured: {featured_count} (guaranteed min: {GUARANTEED_FEATURED_AGENTS})" ) + self.store_submissions = submissions return submissions @@ -825,12 +834,15 @@ class TestDataCreator: print(f"✅ Agent blocks available: {len(self.agent_blocks)}") print(f"✅ Agent graphs created: {len(self.agent_graphs)}") print(f"✅ Library agents created: {len(self.library_agents)}") - print(f"✅ Creator profiles updated: {len(self.profiles)} (some featured)") - print( - f"✅ Store submissions created: {len(self.store_submissions)} (some marked as featured during creation)" - ) + print(f"✅ Creator profiles updated: {len(self.profiles)}") + print(f"✅ Store submissions created: {len(self.store_submissions)}") print(f"✅ API keys created: {len(self.api_keys)}") print(f"✅ Presets created: {len(self.presets)}") + print("\n🎯 Deterministic Guarantees:") + print(f" • Featured agents: >= {GUARANTEED_FEATURED_AGENTS}") + print(f" • Featured creators: >= {GUARANTEED_FEATURED_CREATORS}") + print(f" • Top agents (approved): >= {GUARANTEED_TOP_AGENTS}") + print(f" • Library agents per user: >= {MIN_AGENTS_PER_USER}") print("\n🚀 Your E2E test database is ready to use!") diff --git a/autogpt_platform/frontend/.env.default b/autogpt_platform/frontend/.env.default index af250fb8bf..7a9d81e39e 100644 --- a/autogpt_platform/frontend/.env.default +++ b/autogpt_platform/frontend/.env.default @@ -34,3 +34,6 @@ NEXT_PUBLIC_PREVIEW_STEALING_DEV= # PostHog Analytics NEXT_PUBLIC_POSTHOG_KEY= NEXT_PUBLIC_POSTHOG_HOST=https://eu.i.posthog.com + +# OpenAI (for voice transcription) +OPENAI_API_KEY= diff --git a/autogpt_platform/frontend/CLAUDE.md b/autogpt_platform/frontend/CLAUDE.md new file mode 100644 index 0000000000..b58f1ad6aa --- /dev/null +++ b/autogpt_platform/frontend/CLAUDE.md @@ -0,0 +1,76 @@ +# CLAUDE.md - Frontend + +This file provides guidance to Claude Code when working with the frontend. + +## Essential Commands + +```bash +# Install dependencies +pnpm i + +# Generate API client from OpenAPI spec +pnpm generate:api + +# Start development server +pnpm dev + +# Run E2E tests +pnpm test + +# Run Storybook for component development +pnpm storybook + +# Build production +pnpm build + +# Format and lint +pnpm format + +# Type checking +pnpm types +``` + +### Code Style + +- Fully capitalize acronyms in symbols, e.g. `graphID`, `useBackendAPI` +- Use function declarations (not arrow functions) for components/handlers + +## Architecture + +- **Framework**: Next.js 15 App Router (client-first approach) +- **Data Fetching**: Type-safe generated API hooks via Orval + React Query +- **State Management**: React Query for server state, co-located UI state in components/hooks +- **Component Structure**: Separate render logic (`.tsx`) from business logic (`use*.ts` hooks) +- **Workflow Builder**: Visual graph editor using @xyflow/react +- **UI Components**: shadcn/ui (Radix UI primitives) with Tailwind CSS styling +- **Icons**: Phosphor Icons only +- **Feature Flags**: LaunchDarkly integration +- **Error Handling**: ErrorCard for render errors, toast for mutations, Sentry for exceptions +- **Testing**: Playwright for E2E, Storybook for component development + +## Environment Configuration + +`.env.default` (defaults) → `.env` (user overrides) + +## Feature Development + +See @CONTRIBUTING.md for complete patterns. Quick reference: + +1. **Pages**: Create in `src/app/(platform)/feature-name/page.tsx` + - Extract component logic into custom hooks grouped by concern, not by component. Each hook should represent a cohesive domain of functionality (e.g., useSearch, useFilters, usePagination) rather than bundling all state into one useComponentState hook. + - Put each hook in its own `.ts` file + - Put sub-components in local `components/` folder + - Component props should be `type Props = { ... }` (not exported) unless it needs to be used outside the component +2. **Components**: Structure as `ComponentName/ComponentName.tsx` + `useComponentName.ts` + `helpers.ts` + - Use design system components from `src/components/` (atoms, molecules, organisms) + - Never use `src/components/__legacy__/*` +3. **Data fetching**: Use generated API hooks from `@/app/api/__generated__/endpoints/` + - Regenerate with `pnpm generate:api` + - Pattern: `use{Method}{Version}{OperationName}` +4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only +5. **Testing**: Add Storybook stories for new components, Playwright for E2E +6. **Code conventions**: + - Use function declarations (not arrow functions) for components/handlers + - Do not use `useCallback` or `useMemo` unless asked to optimise a given function + - Do not type hook returns, let Typescript infer as much as possible + - Never type with `any` unless a variable/attribute can ACTUALLY be of any type diff --git a/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/CustomNode/CustomNode.tsx b/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/CustomNode/CustomNode.tsx index 94e917a4ac..834603cc4a 100644 --- a/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/CustomNode/CustomNode.tsx +++ b/autogpt_platform/frontend/src/app/(platform)/build/components/legacy-builder/CustomNode/CustomNode.tsx @@ -857,7 +857,7 @@ export const CustomNode = React.memo( })(); const hasAdvancedFields = - data.inputSchema && + data.inputSchema?.properties && Object.entries(data.inputSchema.properties).some(([key, value]) => { return ( value.advanced === true && !data.inputSchema.required?.includes(key) diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts index 11ddd937af..61e3e6f37f 100644 --- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts @@ -73,9 +73,9 @@ export function useSessionsPagination({ enabled }: UseSessionsPaginationArgs) { }; const reset = () => { + // Only reset the offset - keep existing sessions visible during refetch + // The effect will replace sessions when new data arrives at offset 0 setOffset(0); - setAccumulatedSessions([]); - setTotalCount(null); }; return { diff --git a/autogpt_platform/frontend/src/app/api/openapi.json b/autogpt_platform/frontend/src/app/api/openapi.json index bf8f58fe22..7b0cc410b4 100644 --- a/autogpt_platform/frontend/src/app/api/openapi.json +++ b/autogpt_platform/frontend/src/app/api/openapi.json @@ -7967,6 +7967,25 @@ ] }, "new_output": { "type": "boolean", "title": "New Output" }, + "execution_count": { + "type": "integer", + "title": "Execution Count", + "default": 0 + }, + "success_rate": { + "anyOf": [{ "type": "number" }, { "type": "null" }], + "title": "Success Rate" + }, + "avg_correctness_score": { + "anyOf": [{ "type": "number" }, { "type": "null" }], + "title": "Avg Correctness Score" + }, + "recent_executions": { + "items": { "$ref": "#/components/schemas/RecentExecution" }, + "type": "array", + "title": "Recent Executions", + "description": "List of recent executions with status, score, and summary" + }, "can_access_graph": { "type": "boolean", "title": "Can Access Graph" @@ -9360,6 +9379,23 @@ "required": ["providers", "pagination"], "title": "ProviderResponse" }, + "RecentExecution": { + "properties": { + "status": { "type": "string", "title": "Status" }, + "correctness_score": { + "anyOf": [{ "type": "number" }, { "type": "null" }], + "title": "Correctness Score" + }, + "activity_summary": { + "anyOf": [{ "type": "string" }, { "type": "null" }], + "title": "Activity Summary" + } + }, + "type": "object", + "required": ["status"], + "title": "RecentExecution", + "description": "Summary of a recent execution for quality assessment.\n\nUsed by the LLM to understand the agent's recent performance with specific examples\nrather than just aggregate statistics." + }, "RefundRequest": { "properties": { "id": { "type": "string", "title": "Id" }, @@ -9783,7 +9819,8 @@ "sub_heading": { "type": "string", "title": "Sub Heading" }, "description": { "type": "string", "title": "Description" }, "runs": { "type": "integer", "title": "Runs" }, - "rating": { "type": "number", "title": "Rating" } + "rating": { "type": "number", "title": "Rating" }, + "agent_graph_id": { "type": "string", "title": "Agent Graph Id" } }, "type": "object", "required": [ @@ -9795,7 +9832,8 @@ "sub_heading", "description", "runs", - "rating" + "rating", + "agent_graph_id" ], "title": "StoreAgent" }, diff --git a/autogpt_platform/frontend/src/app/api/transcribe/route.ts b/autogpt_platform/frontend/src/app/api/transcribe/route.ts new file mode 100644 index 0000000000..10c182cdfa --- /dev/null +++ b/autogpt_platform/frontend/src/app/api/transcribe/route.ts @@ -0,0 +1,77 @@ +import { getServerAuthToken } from "@/lib/autogpt-server-api/helpers"; +import { NextRequest, NextResponse } from "next/server"; + +const WHISPER_API_URL = "https://api.openai.com/v1/audio/transcriptions"; +const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB - Whisper's limit + +function getExtensionFromMimeType(mimeType: string): string { + const subtype = mimeType.split("/")[1]?.split(";")[0]; + return subtype || "webm"; +} + +export async function POST(request: NextRequest) { + const token = await getServerAuthToken(); + + if (!token || token === "no-token-found") { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const apiKey = process.env.OPENAI_API_KEY; + + if (!apiKey) { + return NextResponse.json( + { error: "OpenAI API key not configured" }, + { status: 401 }, + ); + } + + try { + const formData = await request.formData(); + const audioFile = formData.get("audio"); + + if (!audioFile || !(audioFile instanceof Blob)) { + return NextResponse.json( + { error: "No audio file provided" }, + { status: 400 }, + ); + } + + if (audioFile.size > MAX_FILE_SIZE) { + return NextResponse.json( + { error: "File too large. Maximum size is 25MB." }, + { status: 413 }, + ); + } + + const ext = getExtensionFromMimeType(audioFile.type); + const whisperFormData = new FormData(); + whisperFormData.append("file", audioFile, `recording.${ext}`); + whisperFormData.append("model", "whisper-1"); + + const response = await fetch(WHISPER_API_URL, { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + }, + body: whisperFormData, + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + console.error("Whisper API error:", errorData); + return NextResponse.json( + { error: errorData.error?.message || "Transcription failed" }, + { status: response.status }, + ); + } + + const result = await response.json(); + return NextResponse.json({ text: result.text }); + } catch (error) { + console.error("Transcription error:", error); + return NextResponse.json( + { error: "Failed to process audio" }, + { status: 500 }, + ); + } +} diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx index c45e8dc250..beb4678e73 100644 --- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx @@ -1,7 +1,14 @@ import { Button } from "@/components/atoms/Button/Button"; import { cn } from "@/lib/utils"; -import { ArrowUpIcon, StopIcon } from "@phosphor-icons/react"; +import { + ArrowUpIcon, + CircleNotchIcon, + MicrophoneIcon, + StopIcon, +} from "@phosphor-icons/react"; +import { RecordingIndicator } from "./components/RecordingIndicator"; import { useChatInput } from "./useChatInput"; +import { useVoiceRecording } from "./useVoiceRecording"; export interface Props { onSend: (message: string) => void; @@ -21,13 +28,37 @@ export function ChatInput({ className, }: Props) { const inputId = "chat-input"; - const { value, handleKeyDown, handleSubmit, handleChange, hasMultipleLines } = - useChatInput({ - onSend, - disabled: disabled || isStreaming, - maxRows: 4, - inputId, - }); + const { + value, + setValue, + handleKeyDown: baseHandleKeyDown, + handleSubmit, + handleChange, + hasMultipleLines, + } = useChatInput({ + onSend, + disabled: disabled || isStreaming, + maxRows: 4, + inputId, + }); + + const { + isRecording, + isTranscribing, + elapsedTime, + toggleRecording, + handleKeyDown, + showMicButton, + isInputDisabled, + audioStream, + } = useVoiceRecording({ + setValue, + disabled: disabled || isStreaming, + isStreaming, + value, + baseHandleKeyDown, + inputId, + }); return (
); diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/AudioWaveform.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/AudioWaveform.tsx new file mode 100644 index 0000000000..10cbb3fc9f --- /dev/null +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/AudioWaveform.tsx @@ -0,0 +1,142 @@ +"use client"; + +import { useEffect, useRef, useState } from "react"; + +interface Props { + stream: MediaStream | null; + barCount?: number; + barWidth?: number; + barGap?: number; + barColor?: string; + minBarHeight?: number; + maxBarHeight?: number; +} + +export function AudioWaveform({ + stream, + barCount = 24, + barWidth = 3, + barGap = 2, + barColor = "#ef4444", // red-500 + minBarHeight = 4, + maxBarHeight = 32, +}: Props) { + const [bars, setBars] = useState