fix(backend): over-fetch blocks in CoPilot find_block to prevent result degradation

Search now fetches 40 results and stops after collecting 10 valid blocks, preventing excluded graph-only blocks from consuming all search slots. Also moves exclusion lists into find_block.py, updates run_block.py imports, and adds exception handling for block instantiation in get_stats(). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 22:05:08 -05:00 · 2026-02-08 18:57:13 -06:00
parent 83248f2b32
commit 0aefa1b22b
6 changed files with 85 additions and 76 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
@@ -13,10 +13,33 @@ from backend.api.features.chat.tools.models import (
    NoResultsResponse,
 )
 from backend.api.features.store.hybrid_search import unified_hybrid_search
-from backend.data.block import EXCLUDED_BLOCK_IDS, EXCLUDED_BLOCK_TYPES, get_block
+from backend.data.block import BlockType, get_block

 logger = logging.getLogger(__name__)

+_TARGET_RESULTS = 10
+# Over-fetch to compensate for post-hoc filtering of graph-only blocks.
+# ~16-17 blocks are currently excluded; 40 provides ample margin.
+_OVERFETCH_PAGE_SIZE = 40
+
+# Block types that only work within graphs and cannot run standalone in CoPilot.
+# NOTE: This does NOT affect the Builder UI which uses load_all_blocks() directly.
+COPILOT_EXCLUDED_BLOCK_TYPES = {
+    BlockType.INPUT,  # Graph interface definition - data enters via chat, not graph inputs
+    BlockType.OUTPUT,  # Graph interface definition - data exits via chat, not graph outputs
+    BlockType.WEBHOOK,  # Wait for external events - would hang forever in CoPilot
+    BlockType.WEBHOOK_MANUAL,  # Same as WEBHOOK
+    BlockType.NOTE,  # Visual annotation only - no runtime behavior
+    BlockType.HUMAN_IN_THE_LOOP,  # Pauses for human approval - CoPilot IS human-in-the-loop
+    BlockType.AGENT,  # AgentExecutorBlock requires execution_context - use run_agent tool
+}
+
+# Specific block IDs excluded from CoPilot (STANDARD type but still require graph context)
+COPILOT_EXCLUDED_BLOCK_IDS = {
+    # SmartDecisionMakerBlock - dynamically discovers downstream blocks via graph topology
+    "3b191d9f-356f-482d-8238-ba04b6d18381",
+}
+

 class FindBlockTool(BaseTool):
    """Tool for searching available blocks."""
@@ -88,7 +111,7 @@ class FindBlockTool(BaseTool):
                query=query,
                content_types=[ContentType.BLOCK],
                page=1,
-                page_size=10,
+                page_size=_OVERFETCH_PAGE_SIZE,
            )

            if not results:
@@ -113,8 +136,8 @@ class FindBlockTool(BaseTool):

                # Skip blocks excluded from CoPilot (graph-only blocks)
                if (
-                    block.block_type in EXCLUDED_BLOCK_TYPES
-                    or block.id in EXCLUDED_BLOCK_IDS
+                    block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
+                    or block.id in COPILOT_EXCLUDED_BLOCK_IDS
                ):
                    continue

@@ -180,6 +203,19 @@ class FindBlockTool(BaseTool):
                    )
                )

+                if len(blocks) >= _TARGET_RESULTS:
+                    break
+
+            if blocks and len(blocks) < _TARGET_RESULTS:
+                logger.debug(
+                    "find_block returned %d/%d results for query '%s' "
+                    "(filtered %d excluded/disabled blocks)",
+                    len(blocks),
+                    _TARGET_RESULTS,
+                    query,
+                    len(results) - len(blocks),
+                )
+
            if not blocks:
                return NoResultsResponse(
                    message=f"No blocks found for '{query}'",
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
@@ -1,10 +1,15 @@
 """Tests for block filtering in FindBlockTool."""

-import pytest
 from unittest.mock import AsyncMock, MagicMock, patch

-from backend.api.features.chat.tools.find_block import FindBlockTool
-from backend.data.block import EXCLUDED_BLOCK_IDS, EXCLUDED_BLOCK_TYPES, BlockType
+import pytest
+
+from backend.api.features.chat.tools.find_block import (
+    COPILOT_EXCLUDED_BLOCK_IDS,
+    COPILOT_EXCLUDED_BLOCK_TYPES,
+    FindBlockTool,
+)
+from backend.data.block import BlockType

 from ._test_data import make_session, setup_test_data

@@ -33,18 +38,18 @@ class TestFindBlockFiltering:
    """Tests for block filtering in FindBlockTool."""

    def test_excluded_block_types_contains_expected_types(self):
-        """Verify EXCLUDED_BLOCK_TYPES contains all graph-only types."""
-        assert BlockType.INPUT in EXCLUDED_BLOCK_TYPES
-        assert BlockType.OUTPUT in EXCLUDED_BLOCK_TYPES
-        assert BlockType.WEBHOOK in EXCLUDED_BLOCK_TYPES
-        assert BlockType.WEBHOOK_MANUAL in EXCLUDED_BLOCK_TYPES
-        assert BlockType.NOTE in EXCLUDED_BLOCK_TYPES
-        assert BlockType.HUMAN_IN_THE_LOOP in EXCLUDED_BLOCK_TYPES
-        assert BlockType.AGENT in EXCLUDED_BLOCK_TYPES
+        """Verify COPILOT_EXCLUDED_BLOCK_TYPES contains all graph-only types."""
+        assert BlockType.INPUT in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.OUTPUT in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.WEBHOOK in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.WEBHOOK_MANUAL in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.NOTE in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.HUMAN_IN_THE_LOOP in COPILOT_EXCLUDED_BLOCK_TYPES
+        assert BlockType.AGENT in COPILOT_EXCLUDED_BLOCK_TYPES

    def test_excluded_block_ids_contains_smart_decision_maker(self):
-        """Verify SmartDecisionMakerBlock is in EXCLUDED_BLOCK_IDS."""
-        assert "3b191d9f-356f-482d-8238-ba04b6d18381" in EXCLUDED_BLOCK_IDS
+        """Verify SmartDecisionMakerBlock is in COPILOT_EXCLUDED_BLOCK_IDS."""
+        assert "3b191d9f-356f-482d-8238-ba04b6d18381" in COPILOT_EXCLUDED_BLOCK_IDS

    @pytest.mark.asyncio(loop_scope="session")
    async def test_excluded_block_type_filtered_from_results(self, setup_test_data):
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
@@ -8,7 +8,11 @@ from typing import Any
 from pydantic_core import PydanticUndefined

 from backend.api.features.chat.model import ChatSession
-from backend.data.block import EXCLUDED_BLOCK_IDS, EXCLUDED_BLOCK_TYPES, get_block
+from backend.api.features.chat.tools.find_block import (
+    COPILOT_EXCLUDED_BLOCK_IDS,
+    COPILOT_EXCLUDED_BLOCK_TYPES,
+)
+from backend.data.block import get_block
 from backend.data.execution import ExecutionContext
 from backend.data.model import CredentialsMetaInput
 from backend.data.workspace import get_or_create_workspace
@@ -213,7 +217,10 @@ class RunBlockTool(BaseTool):
            )

        # Check if block is excluded from CoPilot (graph-only blocks)
-        if block.block_type in EXCLUDED_BLOCK_TYPES or block.id in EXCLUDED_BLOCK_IDS:
+        if (
+            block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
+            or block.id in COPILOT_EXCLUDED_BLOCK_IDS
+        ):
            return ErrorResponse(
                message=(
                    f"Block '{block.name}' cannot be run directly in CoPilot. "
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
@@ -1,11 +1,12 @@
 """Tests for block execution guards in RunBlockTool."""

-import pytest
 from unittest.mock import MagicMock, patch

-from backend.api.features.chat.tools.run_block import RunBlockTool
+import pytest
+
 from backend.api.features.chat.tools.models import ErrorResponse
-from backend.data.block import EXCLUDED_BLOCK_IDS, EXCLUDED_BLOCK_TYPES, BlockType
+from backend.api.features.chat.tools.run_block import RunBlockTool
+from backend.data.block import BlockType

 from ._test_data import make_session, setup_test_data

@@ -37,9 +38,7 @@ class TestRunBlockFiltering:
        user = setup_test_data["user"]
        session = make_session(user_id=user.id)

-        input_block = make_mock_block(
-            "input-block-id", "Input Block", BlockType.INPUT
-        )
+        input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)

        with patch(
            "backend.api.features.chat.tools.run_block.get_block",
--- a/autogpt_platform/backend/backend/api/features/store/content_handlers.py
+++ b/autogpt_platform/backend/backend/api/features/store/content_handlers.py
@@ -13,7 +13,6 @@ from typing import Any

 from prisma.enums import ContentType

-from backend.data.block import EXCLUDED_BLOCK_IDS, EXCLUDED_BLOCK_TYPES
 from backend.data.db import query_raw_with_schema

 logger = logging.getLogger(__name__)
@@ -177,32 +176,22 @@ class BlockHandler(ContentHandler):
        )

        existing_ids = {row["contentId"] for row in existing_result}
+        missing_blocks = [
+            (block_id, block_cls)
+            for block_id, block_cls in all_blocks.items()
+            if block_id not in existing_ids
+        ]

-        # Filter blocks: exclude already-embedded, disabled, and graph-only blocks
-        # IMPORTANT: Filter BEFORE slicing to batch_size to avoid returning empty
-        # batches when all first N blocks are excluded (causing processing stall)
-        eligible_blocks = []
-        for block_id, block_cls in all_blocks.items():
-            if block_id in existing_ids:
-                continue
+        # Convert to ContentItem
+        items = []
+        for block_id, block_cls in missing_blocks[:batch_size]:
            try:
                block_instance = block_cls()
+
+                # Skip disabled blocks - they shouldn't be indexed
                if block_instance.disabled:
                    continue
-                if (
-                    block_instance.block_type in EXCLUDED_BLOCK_TYPES
-                    or block_id in EXCLUDED_BLOCK_IDS
-                ):
-                    continue
-                eligible_blocks.append((block_id, block_cls, block_instance))
-            except Exception as e:
-                logger.warning(f"Failed to instantiate block {block_id}: {e}")
-                continue

-        # Convert to ContentItem (now safe to slice after filtering)
-        items = []
-        for block_id, block_cls, block_instance in eligible_blocks[:batch_size]:
-            try:
                # Build searchable text from block metadata
                parts = []
                if hasattr(block_instance, "name") and block_instance.name:
@@ -264,22 +253,14 @@ class BlockHandler(ContentHandler):

        all_blocks = get_blocks()

-        # Filter out disabled blocks and excluded blocks - they're not indexed
+        # Filter out disabled blocks - they're not indexed
        enabled_block_ids = []
        for block_id, block_cls in all_blocks.items():
            try:
-                block_instance = block_cls()
+                if not block_cls().disabled:
+                    enabled_block_ids.append(block_id)
            except Exception as e:
                logger.warning(f"Failed to instantiate block {block_id}: {e}")
-                continue
-            if block_instance.disabled:
-                continue
-            if (
-                block_instance.block_type in EXCLUDED_BLOCK_TYPES
-                or block_id in EXCLUDED_BLOCK_IDS
-            ):
-                continue
-            enabled_block_ids.append(block_id)
        total_blocks = len(enabled_block_ids)

        if total_blocks == 0:
--- a/autogpt_platform/backend/backend/data/block.py
+++ b/autogpt_platform/backend/backend/data/block.py
@@ -76,25 +76,6 @@ class BlockType(Enum):
    HUMAN_IN_THE_LOOP = "Human In The Loop"


-# Blocks excluded from CoPilot standalone execution
-# NOTE: This does NOT affect the Builder UI which uses load_all_blocks() directly
-EXCLUDED_BLOCK_TYPES = {
-    BlockType.INPUT,  # Graph interface definition - data enters via chat, not graph inputs
-    BlockType.OUTPUT,  # Graph interface definition - data exits via chat, not graph outputs
-    BlockType.WEBHOOK,  # Wait for external events - would hang forever in CoPilot
-    BlockType.WEBHOOK_MANUAL,  # Same as WEBHOOK
-    BlockType.NOTE,  # Visual annotation only - no runtime behavior
-    BlockType.HUMAN_IN_THE_LOOP,  # Pauses for human approval - CoPilot IS human-in-the-loop
-    BlockType.AGENT,  # AgentExecutorBlock requires execution_context - use run_agent tool
-}
-
-# Blocks that have STANDARD/other types but still require graph context
-EXCLUDED_BLOCK_IDS = {
-    # SmartDecisionMakerBlock - dynamically discovers downstream blocks via graph topology
-    "3b191d9f-356f-482d-8238-ba04b6d18381",
-}
-
-
 class BlockCategory(Enum):
    AI = "Block that leverages AI to perform a task."
    SOCIAL = "Block that interacts with social media platforms."