fix(backend): over-fetch blocks in CoPilot find_block to prevent result degradation

Search now fetches 40 results and stops after collecting 10 valid blocks,
preventing excluded graph-only blocks from consuming all search slots.
Also moves exclusion lists into find_block.py, updates run_block.py imports,
and adds exception handling for block instantiation in get_stats().

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Nicholas Tindle
2026-02-08 18:57:13 -06:00
parent 83248f2b32
commit 0aefa1b22b
6 changed files with 85 additions and 76 deletions

View File

@@ -13,10 +13,33 @@ from backend.api.features.chat.tools.models import (
NoResultsResponse,
)
from backend.api.features.store.hybrid_search import unified_hybrid_search
from backend.data.block import EXCLUDED_BLOCK_IDS, EXCLUDED_BLOCK_TYPES, get_block
from backend.data.block import BlockType, get_block
logger = logging.getLogger(__name__)
_TARGET_RESULTS = 10
# Over-fetch to compensate for post-hoc filtering of graph-only blocks.
# ~16-17 blocks are currently excluded; 40 provides ample margin.
_OVERFETCH_PAGE_SIZE = 40
# Block types that only work within graphs and cannot run standalone in CoPilot.
# NOTE: This does NOT affect the Builder UI which uses load_all_blocks() directly.
COPILOT_EXCLUDED_BLOCK_TYPES = {
BlockType.INPUT, # Graph interface definition - data enters via chat, not graph inputs
BlockType.OUTPUT, # Graph interface definition - data exits via chat, not graph outputs
BlockType.WEBHOOK, # Wait for external events - would hang forever in CoPilot
BlockType.WEBHOOK_MANUAL, # Same as WEBHOOK
BlockType.NOTE, # Visual annotation only - no runtime behavior
BlockType.HUMAN_IN_THE_LOOP, # Pauses for human approval - CoPilot IS human-in-the-loop
BlockType.AGENT, # AgentExecutorBlock requires execution_context - use run_agent tool
}
# Specific block IDs excluded from CoPilot (STANDARD type but still require graph context)
COPILOT_EXCLUDED_BLOCK_IDS = {
# SmartDecisionMakerBlock - dynamically discovers downstream blocks via graph topology
"3b191d9f-356f-482d-8238-ba04b6d18381",
}
class FindBlockTool(BaseTool):
"""Tool for searching available blocks."""
@@ -88,7 +111,7 @@ class FindBlockTool(BaseTool):
query=query,
content_types=[ContentType.BLOCK],
page=1,
page_size=10,
page_size=_OVERFETCH_PAGE_SIZE,
)
if not results:
@@ -113,8 +136,8 @@ class FindBlockTool(BaseTool):
# Skip blocks excluded from CoPilot (graph-only blocks)
if (
block.block_type in EXCLUDED_BLOCK_TYPES
or block.id in EXCLUDED_BLOCK_IDS
block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
or block.id in COPILOT_EXCLUDED_BLOCK_IDS
):
continue
@@ -180,6 +203,19 @@ class FindBlockTool(BaseTool):
)
)
if len(blocks) >= _TARGET_RESULTS:
break
if blocks and len(blocks) < _TARGET_RESULTS:
logger.debug(
"find_block returned %d/%d results for query '%s' "
"(filtered %d excluded/disabled blocks)",
len(blocks),
_TARGET_RESULTS,
query,
len(results) - len(blocks),
)
if not blocks:
return NoResultsResponse(
message=f"No blocks found for '{query}'",

View File

@@ -1,10 +1,15 @@
"""Tests for block filtering in FindBlockTool."""
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from backend.api.features.chat.tools.find_block import FindBlockTool
from backend.data.block import EXCLUDED_BLOCK_IDS, EXCLUDED_BLOCK_TYPES, BlockType
import pytest
from backend.api.features.chat.tools.find_block import (
COPILOT_EXCLUDED_BLOCK_IDS,
COPILOT_EXCLUDED_BLOCK_TYPES,
FindBlockTool,
)
from backend.data.block import BlockType
from ._test_data import make_session, setup_test_data
@@ -33,18 +38,18 @@ class TestFindBlockFiltering:
"""Tests for block filtering in FindBlockTool."""
def test_excluded_block_types_contains_expected_types(self):
"""Verify EXCLUDED_BLOCK_TYPES contains all graph-only types."""
assert BlockType.INPUT in EXCLUDED_BLOCK_TYPES
assert BlockType.OUTPUT in EXCLUDED_BLOCK_TYPES
assert BlockType.WEBHOOK in EXCLUDED_BLOCK_TYPES
assert BlockType.WEBHOOK_MANUAL in EXCLUDED_BLOCK_TYPES
assert BlockType.NOTE in EXCLUDED_BLOCK_TYPES
assert BlockType.HUMAN_IN_THE_LOOP in EXCLUDED_BLOCK_TYPES
assert BlockType.AGENT in EXCLUDED_BLOCK_TYPES
"""Verify COPILOT_EXCLUDED_BLOCK_TYPES contains all graph-only types."""
assert BlockType.INPUT in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.OUTPUT in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.WEBHOOK in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.WEBHOOK_MANUAL in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.NOTE in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.HUMAN_IN_THE_LOOP in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.AGENT in COPILOT_EXCLUDED_BLOCK_TYPES
def test_excluded_block_ids_contains_smart_decision_maker(self):
"""Verify SmartDecisionMakerBlock is in EXCLUDED_BLOCK_IDS."""
assert "3b191d9f-356f-482d-8238-ba04b6d18381" in EXCLUDED_BLOCK_IDS
"""Verify SmartDecisionMakerBlock is in COPILOT_EXCLUDED_BLOCK_IDS."""
assert "3b191d9f-356f-482d-8238-ba04b6d18381" in COPILOT_EXCLUDED_BLOCK_IDS
@pytest.mark.asyncio(loop_scope="session")
async def test_excluded_block_type_filtered_from_results(self, setup_test_data):

View File

@@ -8,7 +8,11 @@ from typing import Any
from pydantic_core import PydanticUndefined
from backend.api.features.chat.model import ChatSession
from backend.data.block import EXCLUDED_BLOCK_IDS, EXCLUDED_BLOCK_TYPES, get_block
from backend.api.features.chat.tools.find_block import (
COPILOT_EXCLUDED_BLOCK_IDS,
COPILOT_EXCLUDED_BLOCK_TYPES,
)
from backend.data.block import get_block
from backend.data.execution import ExecutionContext
from backend.data.model import CredentialsMetaInput
from backend.data.workspace import get_or_create_workspace
@@ -213,7 +217,10 @@ class RunBlockTool(BaseTool):
)
# Check if block is excluded from CoPilot (graph-only blocks)
if block.block_type in EXCLUDED_BLOCK_TYPES or block.id in EXCLUDED_BLOCK_IDS:
if (
block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
or block.id in COPILOT_EXCLUDED_BLOCK_IDS
):
return ErrorResponse(
message=(
f"Block '{block.name}' cannot be run directly in CoPilot. "

View File

@@ -1,11 +1,12 @@
"""Tests for block execution guards in RunBlockTool."""
import pytest
from unittest.mock import MagicMock, patch
from backend.api.features.chat.tools.run_block import RunBlockTool
import pytest
from backend.api.features.chat.tools.models import ErrorResponse
from backend.data.block import EXCLUDED_BLOCK_IDS, EXCLUDED_BLOCK_TYPES, BlockType
from backend.api.features.chat.tools.run_block import RunBlockTool
from backend.data.block import BlockType
from ._test_data import make_session, setup_test_data
@@ -37,9 +38,7 @@ class TestRunBlockFiltering:
user = setup_test_data["user"]
session = make_session(user_id=user.id)
input_block = make_mock_block(
"input-block-id", "Input Block", BlockType.INPUT
)
input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)
with patch(
"backend.api.features.chat.tools.run_block.get_block",

View File

@@ -13,7 +13,6 @@ from typing import Any
from prisma.enums import ContentType
from backend.data.block import EXCLUDED_BLOCK_IDS, EXCLUDED_BLOCK_TYPES
from backend.data.db import query_raw_with_schema
logger = logging.getLogger(__name__)
@@ -177,32 +176,22 @@ class BlockHandler(ContentHandler):
)
existing_ids = {row["contentId"] for row in existing_result}
missing_blocks = [
(block_id, block_cls)
for block_id, block_cls in all_blocks.items()
if block_id not in existing_ids
]
# Filter blocks: exclude already-embedded, disabled, and graph-only blocks
# IMPORTANT: Filter BEFORE slicing to batch_size to avoid returning empty
# batches when all first N blocks are excluded (causing processing stall)
eligible_blocks = []
for block_id, block_cls in all_blocks.items():
if block_id in existing_ids:
continue
# Convert to ContentItem
items = []
for block_id, block_cls in missing_blocks[:batch_size]:
try:
block_instance = block_cls()
# Skip disabled blocks - they shouldn't be indexed
if block_instance.disabled:
continue
if (
block_instance.block_type in EXCLUDED_BLOCK_TYPES
or block_id in EXCLUDED_BLOCK_IDS
):
continue
eligible_blocks.append((block_id, block_cls, block_instance))
except Exception as e:
logger.warning(f"Failed to instantiate block {block_id}: {e}")
continue
# Convert to ContentItem (now safe to slice after filtering)
items = []
for block_id, block_cls, block_instance in eligible_blocks[:batch_size]:
try:
# Build searchable text from block metadata
parts = []
if hasattr(block_instance, "name") and block_instance.name:
@@ -264,22 +253,14 @@ class BlockHandler(ContentHandler):
all_blocks = get_blocks()
# Filter out disabled blocks and excluded blocks - they're not indexed
# Filter out disabled blocks - they're not indexed
enabled_block_ids = []
for block_id, block_cls in all_blocks.items():
try:
block_instance = block_cls()
if not block_cls().disabled:
enabled_block_ids.append(block_id)
except Exception as e:
logger.warning(f"Failed to instantiate block {block_id}: {e}")
continue
if block_instance.disabled:
continue
if (
block_instance.block_type in EXCLUDED_BLOCK_TYPES
or block_id in EXCLUDED_BLOCK_IDS
):
continue
enabled_block_ids.append(block_id)
total_blocks = len(enabled_block_ids)
if total_blocks == 0:

View File

@@ -76,25 +76,6 @@ class BlockType(Enum):
HUMAN_IN_THE_LOOP = "Human In The Loop"
# Blocks excluded from CoPilot standalone execution
# NOTE: This does NOT affect the Builder UI which uses load_all_blocks() directly
EXCLUDED_BLOCK_TYPES = {
BlockType.INPUT, # Graph interface definition - data enters via chat, not graph inputs
BlockType.OUTPUT, # Graph interface definition - data exits via chat, not graph outputs
BlockType.WEBHOOK, # Wait for external events - would hang forever in CoPilot
BlockType.WEBHOOK_MANUAL, # Same as WEBHOOK
BlockType.NOTE, # Visual annotation only - no runtime behavior
BlockType.HUMAN_IN_THE_LOOP, # Pauses for human approval - CoPilot IS human-in-the-loop
BlockType.AGENT, # AgentExecutorBlock requires execution_context - use run_agent tool
}
# Blocks that have STANDARD/other types but still require graph context
EXCLUDED_BLOCK_IDS = {
# SmartDecisionMakerBlock - dynamically discovers downstream blocks via graph topology
"3b191d9f-356f-482d-8238-ba04b6d18381",
}
class BlockCategory(Enum):
AI = "Block that leverages AI to perform a task."
SOCIAL = "Block that interacts with social media platforms."