From 2facfccbeaa619bd1e91f9e946916de7e445a99b Mon Sep 17 00:00:00 2001 From: Swifty Date: Mon, 9 Feb 2026 11:11:35 +0100 Subject: [PATCH] reduce find_block return size --- .../api/features/chat/tools/find_block.py | 46 ++-- .../features/chat/tools/find_block_test.py | 254 +++++++++++++++++- .../backend/api/features/chat/tools/models.py | 11 +- 3 files changed, 280 insertions(+), 31 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py index f55cd567e8..ca00e73d30 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py @@ -54,7 +54,8 @@ class FindBlockTool(BaseTool): "Blocks are reusable components that perform specific tasks like " "sending emails, making API calls, processing text, etc. " "IMPORTANT: Use this tool FIRST to get the block's 'id' before calling run_block. " - "The response includes each block's id, required_inputs, and input_schema." + "The response includes each block's id, required_inputs, " + "and output_fields." ) @property @@ -123,7 +124,7 @@ class FindBlockTool(BaseTool): session_id=session_id, ) - # Enrich results with full block information + # Enrich results with block information blocks: list[BlockInfoSummary] = [] for result in results: block_id = result["content_id"] @@ -141,8 +142,8 @@ class FindBlockTool(BaseTool): continue # Get input/output schemas - input_schema = {} - output_schema = {} + input_schema: dict[str, Any] = {} + output_schema: dict[str, Any] = {} try: input_schema = block.input_schema.jsonschema() except Exception as e: @@ -160,26 +161,20 @@ class FindBlockTool(BaseTool): e, ) - # Get categories from block instance - categories = [] - if hasattr(block, "categories") and block.categories: - categories = [cat.value for cat in block.categories] + # Get credential field names to exclude from required inputs + credentials_fields = set( + block.input_schema.get_credentials_fields().keys() + ) - # Extract required inputs for easier use + # Extract input fields (excluding credentials) required_inputs: list[BlockInputFieldInfo] = [] if input_schema: properties = input_schema.get("properties", {}) required_fields = set(input_schema.get("required", [])) - # Get credential field names to exclude from required inputs - credentials_fields = set( - block.input_schema.get_credentials_fields().keys() - ) for field_name, field_schema in properties.items(): - # Skip credential fields - they're handled separately if field_name in credentials_fields: continue - required_inputs.append( BlockInputFieldInfo( name=field_name, @@ -190,15 +185,26 @@ class FindBlockTool(BaseTool): ) ) + # Extract output fields + output_fields: list[BlockInputFieldInfo] = [] + if output_schema: + out_props = output_schema.get("properties", {}) + for field_name, field_schema in out_props.items(): + output_fields.append( + BlockInputFieldInfo( + name=field_name, + type=field_schema.get("type", "string"), + description=field_schema.get("description", ""), + ) + ) + blocks.append( BlockInfoSummary( id=block_id, name=block.name, description=block.description or "", - categories=categories, - input_schema=input_schema, - output_schema=output_schema, required_inputs=required_inputs, + output_fields=output_fields, ) ) @@ -227,8 +233,8 @@ class FindBlockTool(BaseTool): return BlockListResponse( message=( f"Found {len(blocks)} block(s) matching '{query}'. " - "To execute a block, use run_block with the block's 'id' field " - "and provide 'input_data' matching the block's input_schema." + "To execute a block, use run_block with the block's 'id' " + "and provide 'input_data' matching required_inputs." ), blocks=blocks, count=len(blocks), diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py b/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py index 0f3d4cbfa5..5ca4887601 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py @@ -18,7 +18,13 @@ _TEST_USER_ID = "test-user-find-block" def make_mock_block( - block_id: str, name: str, block_type: BlockType, disabled: bool = False + block_id: str, + name: str, + block_type: BlockType, + disabled: bool = False, + input_schema: dict | None = None, + output_schema: dict | None = None, + credentials_fields: dict | None = None, ): """Create a mock block for testing.""" mock = MagicMock() @@ -28,10 +34,13 @@ def make_mock_block( mock.block_type = block_type mock.disabled = disabled mock.input_schema = MagicMock() - mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []} - mock.input_schema.get_credentials_fields.return_value = {} + mock.input_schema.jsonschema.return_value = input_schema or { + "properties": {}, + "required": [], + } + mock.input_schema.get_credentials_fields.return_value = credentials_fields or {} mock.output_schema = MagicMock() - mock.output_schema.jsonschema.return_value = {} + mock.output_schema.jsonschema.return_value = output_schema or {} mock.categories = [] return mock @@ -137,3 +146,240 @@ class TestFindBlockFiltering: assert isinstance(response, BlockListResponse) assert len(response.blocks) == 1 assert response.blocks[0].id == "normal-block-id" + + @pytest.mark.asyncio(loop_scope="session") + async def test_response_size_average_chars_per_block(self): + """Measure average chars per block in the serialized response.""" + session = make_session(user_id=_TEST_USER_ID) + + # Realistic block definitions modeled after real blocks + block_defs = [ + { + "id": "http-block-id", + "name": "Send Web Request", + "input_schema": { + "properties": { + "url": { + "type": "string", + "description": "The URL to send the request to", + }, + "method": { + "type": "string", + "description": "The HTTP method to use", + }, + "headers": { + "type": "object", + "description": "Headers to include in the request", + }, + "json_format": { + "type": "boolean", + "description": "If true, send the body as JSON", + }, + "body": { + "type": "object", + "description": "Form/JSON body payload", + }, + "credentials": { + "type": "object", + "description": "HTTP credentials", + }, + }, + "required": ["url", "method"], + }, + "output_schema": { + "properties": { + "response": { + "type": "object", + "description": "The response from the server", + }, + "client_error": { + "type": "object", + "description": "Errors on 4xx status codes", + }, + "server_error": { + "type": "object", + "description": "Errors on 5xx status codes", + }, + "error": { + "type": "string", + "description": "Errors for all other exceptions", + }, + }, + }, + "credentials_fields": {"credentials": True}, + }, + { + "id": "email-block-id", + "name": "Send Email", + "input_schema": { + "properties": { + "to_email": { + "type": "string", + "description": "Recipient email address", + }, + "subject": { + "type": "string", + "description": "Subject of the email", + }, + "body": { + "type": "string", + "description": "Body of the email", + }, + "config": { + "type": "object", + "description": "SMTP Config", + }, + "credentials": { + "type": "object", + "description": "SMTP credentials", + }, + }, + "required": ["to_email", "subject", "body", "credentials"], + }, + "output_schema": { + "properties": { + "status": { + "type": "string", + "description": "Status of the email sending operation", + }, + "error": { + "type": "string", + "description": "Error message if sending failed", + }, + }, + }, + "credentials_fields": {"credentials": True}, + }, + { + "id": "claude-code-block-id", + "name": "Claude Code", + "input_schema": { + "properties": { + "e2b_credentials": { + "type": "object", + "description": "API key for E2B platform", + }, + "anthropic_credentials": { + "type": "object", + "description": "API key for Anthropic", + }, + "prompt": { + "type": "string", + "description": "Task or instruction for Claude Code", + }, + "timeout": { + "type": "integer", + "description": "Sandbox timeout in seconds", + }, + "setup_commands": { + "type": "array", + "description": "Shell commands to run before execution", + }, + "working_directory": { + "type": "string", + "description": "Working directory for Claude Code", + }, + "session_id": { + "type": "string", + "description": "Session ID to resume a conversation", + }, + "sandbox_id": { + "type": "string", + "description": "Sandbox ID to reconnect to", + }, + "conversation_history": { + "type": "string", + "description": "Previous conversation history", + }, + "dispose_sandbox": { + "type": "boolean", + "description": "Whether to dispose sandbox after execution", + }, + }, + "required": [ + "e2b_credentials", + "anthropic_credentials", + "prompt", + ], + }, + "output_schema": { + "properties": { + "response": { + "type": "string", + "description": "Output from Claude Code execution", + }, + "files": { + "type": "array", + "description": "Files created/modified by Claude Code", + }, + "conversation_history": { + "type": "string", + "description": "Full conversation history", + }, + "session_id": { + "type": "string", + "description": "Session ID for this conversation", + }, + "sandbox_id": { + "type": "string", + "description": "ID of the sandbox instance", + }, + "error": { + "type": "string", + "description": "Error message if execution failed", + }, + }, + }, + "credentials_fields": { + "e2b_credentials": True, + "anthropic_credentials": True, + }, + }, + ] + + search_results = [ + {"content_id": d["id"], "score": 0.9 - i * 0.1} + for i, d in enumerate(block_defs) + ] + mock_blocks = { + d["id"]: make_mock_block( + block_id=d["id"], + name=d["name"], + block_type=BlockType.STANDARD, + input_schema=d["input_schema"], + output_schema=d["output_schema"], + credentials_fields=d["credentials_fields"], + ) + for d in block_defs + } + + with patch( + "backend.api.features.chat.tools.find_block.unified_hybrid_search", + new_callable=AsyncMock, + return_value=(search_results, len(search_results)), + ), patch( + "backend.api.features.chat.tools.find_block.get_block", + side_effect=lambda bid: mock_blocks.get(bid), + ): + tool = FindBlockTool() + response = await tool._execute( + user_id=_TEST_USER_ID, session=session, query="test" + ) + + assert isinstance(response, BlockListResponse) + assert response.count == len(block_defs) + + total_chars = len(response.model_dump_json()) + avg_chars = total_chars // response.count + + # Print for visibility in test output + print(f"\nTotal response size: {total_chars} chars") + print(f"Number of blocks: {response.count}") + print(f"Average chars per block: {avg_chars}") + + # The old response was ~90K for 10 blocks (~9K per block). + # With the optimized format (no raw JSON schemas) we expect ~1.5K per block. + assert avg_chars < 2000, ( + f"Average chars per block ({avg_chars}) exceeds 2000. " + f"Total response: {total_chars} chars for {response.count} blocks." + ) diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/models.py b/autogpt_platform/backend/backend/api/features/chat/tools/models.py index 69c8c6c684..1298f02a5f 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/models.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/models.py @@ -334,13 +334,14 @@ class BlockInfoSummary(BaseModel): id: str name: str description: str - categories: list[str] - input_schema: dict[str, Any] - output_schema: dict[str, Any] required_inputs: list[BlockInputFieldInfo] = Field( default_factory=list, description="List of required input fields for this block", ) + output_fields: list[BlockInputFieldInfo] = Field( + default_factory=list, + description="Output fields produced by this block", + ) class BlockListResponse(ToolResponseBase): @@ -350,10 +351,6 @@ class BlockListResponse(ToolResponseBase): blocks: list[BlockInfoSummary] count: int query: str - usage_hint: str = Field( - default="To execute a block, call run_block with block_id set to the block's " - "'id' field and input_data containing the required fields from input_schema." - ) class BlockOutputResponse(ToolResponseBase):