From 7dc53071e8a9f293f729886e39524bae6358490c Mon Sep 17 00:00:00 2001 From: Otto Date: Tue, 3 Feb 2026 12:43:30 +0000 Subject: [PATCH] fix(backend): Add retry and error handling to block initialization (#11946) ## Summary Adds retry logic and graceful error handling to `initialize_blocks()` to prevent transient DB errors from crashing server startup. ## Problem When a transient database error occurs during block initialization (e.g., Prisma P1017 "Server has closed the connection"), the entire server fails to start. This is overly aggressive since: 1. Blocks are already registered in memory 2. The DB sync is primarily for tracking/schema storage 3. One flaky connection shouldn't prevent the server from starting **Triggered by:** [Sentry AUTOGPT-SERVER-7PW](https://significant-gravitas.sentry.io/issues/7238733543/) ## Solution - Add retry decorator (3 attempts with exponential backoff) for DB operations - On failure after retries, log a warning and continue to the next block - Blocks remain available in memory even if DB sync fails - Log summary of any failed blocks at the end ## Changes - `autogpt_platform/backend/backend/data/block.py`: Wrap block DB sync in retry logic with graceful fallback ## Testing - Existing block initialization behavior unchanged on success - On transient DB errors: retries up to 3 times, then continues with warning --- .../backend/backend/data/block.py | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/autogpt_platform/backend/backend/data/block.py b/autogpt_platform/backend/backend/data/block.py index 8d9ecfff4c..eb9360b037 100644 --- a/autogpt_platform/backend/backend/data/block.py +++ b/autogpt_platform/backend/backend/data/block.py @@ -873,14 +873,13 @@ def is_block_auth_configured( async def initialize_blocks() -> None: - # First, sync all provider costs to blocks - # Imported here to avoid circular import from backend.sdk.cost_integration import sync_all_provider_costs + from backend.util.retry import func_retry sync_all_provider_costs() - for cls in get_blocks().values(): - block = cls() + @func_retry + async def sync_block_to_db(block: Block) -> None: existing_block = await AgentBlock.prisma().find_first( where={"OR": [{"id": block.id}, {"name": block.name}]} ) @@ -893,7 +892,7 @@ async def initialize_blocks() -> None: outputSchema=json.dumps(block.output_schema.jsonschema()), ) ) - continue + return input_schema = json.dumps(block.input_schema.jsonschema()) output_schema = json.dumps(block.output_schema.jsonschema()) @@ -913,6 +912,25 @@ async def initialize_blocks() -> None: }, ) + failed_blocks: list[str] = [] + for cls in get_blocks().values(): + block = cls() + try: + await sync_block_to_db(block) + except Exception as e: + logger.warning( + f"Failed to sync block {block.name} to database: {e}. " + "Block is still available in memory.", + exc_info=True, + ) + failed_blocks.append(block.name) + + if failed_blocks: + logger.error( + f"Failed to sync {len(failed_blocks)} block(s) to database: " + f"{', '.join(failed_blocks)}. These blocks are still available in memory." + ) + # Note on the return type annotation: https://github.com/microsoft/pyright/issues/10281 def get_block(block_id: str) -> AnyBlockSchema | None: