fix(backend): improve server error handling (#10030)

## Changes
- log helpful hints when metrics fail to record
- clarify API key errors in v1 router
- improve Postmark unsubscribe and webhook logs
- surface actionable feedback across integrations and store APIs
- handle Otto proxy failures with guidance

## Checklist
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan
This commit is contained in:
Swifty
2025-06-10 12:09:57 +02:00
committed by GitHub
parent 7165958feb
commit 4a03e5cbaf
11 changed files with 274 additions and 77 deletions

View File

@@ -120,9 +120,17 @@ def callback(
)
except Exception as e:
logger.error(f"Code->Token exchange failed for provider {provider.value}: {e}")
logger.exception(
"OAuth callback for provider %s failed during code exchange: %s. Confirm provider credentials.",
provider.value,
e,
)
raise HTTPException(
status_code=400, detail=f"Failed to exchange code for tokens: {str(e)}"
status_code=400,
detail={
"message": str(e),
"hint": "Verify OAuth configuration and try again.",
},
)
# TODO: Allow specifying `title` to set on `credentials`
@@ -286,9 +294,13 @@ async def webhook_ingress_generic(
try:
webhook = await get_webhook(webhook_id)
except NotFoundError as e:
logger.warning(f"Webhook payload received for unknown webhook: {e}")
logger.warning(
"Webhook payload received for unknown webhook %s. Confirm the webhook ID.",
webhook_id,
)
raise HTTPException(
status_code=HTTP_404_NOT_FOUND, detail=f"Webhook #{webhook_id} not found"
status_code=HTTP_404_NOT_FOUND,
detail={"message": str(e), "hint": "Check if the webhook ID is correct."},
) from e
logger.debug(f"Webhook #{webhook_id}: {webhook}")
payload, event_type = await webhook_manager.validate_payload(webhook, request)
@@ -398,11 +410,16 @@ def _get_provider_oauth_handler(
client_id = getattr(settings.secrets, f"{provider_name.value}_client_id")
client_secret = getattr(settings.secrets, f"{provider_name.value}_client_secret")
if not (client_id and client_secret):
logger.error(
"OAuth credentials for provider %s are missing. Check environment configuration.",
provider_name.value,
)
raise HTTPException(
status_code=501,
detail=(
f"Integration with provider '{provider_name.value}' is not configured"
),
detail={
"message": f"Integration with provider '{provider_name.value}' is not configured",
"hint": "Set client ID and secret in the environment.",
},
)
handler_class = HANDLERS_BY_NAME[provider_name]

View File

@@ -5,6 +5,7 @@ from typing import Any, Optional
import autogpt_libs.auth.models
import fastapi
import fastapi.responses
import pydantic
import starlette.middleware.cors
import uvicorn
from autogpt_libs.feature_flag.client import (
@@ -12,6 +13,7 @@ from autogpt_libs.feature_flag.client import (
shutdown_launchdarkly,
)
from autogpt_libs.logging.utils import generate_uvicorn_config
from fastapi.exceptions import RequestValidationError
import backend.data.block
import backend.data.db
@@ -86,11 +88,23 @@ app = fastapi.FastAPI(
def handle_internal_http_error(status_code: int = 500, log_error: bool = True):
def handler(request: fastapi.Request, exc: Exception):
if log_error:
logger.exception(f"{request.method} {request.url.path} failed: {exc}")
logger.exception(
"%s %s failed. Investigate and resolve the underlying issue: %s",
request.method,
request.url.path,
exc,
)
hint = (
"Adjust the request and retry."
if status_code < 500
else "Check server logs and dependent services."
)
return fastapi.responses.JSONResponse(
content={
"message": f"{request.method} {request.url.path} failed",
"message": f"Failed to process {request.method} {request.url.path}",
"detail": str(exc),
"hint": hint,
},
status_code=status_code,
)
@@ -98,6 +112,32 @@ def handle_internal_http_error(status_code: int = 500, log_error: bool = True):
return handler
async def validation_error_handler(
request: fastapi.Request, exc: Exception
) -> fastapi.responses.JSONResponse:
logger.error(
"Validation failed for %s %s: %s. Fix the request payload and try again.",
request.method,
request.url.path,
exc,
)
errors: list | str
if hasattr(exc, "errors"):
errors = exc.errors() # type: ignore[call-arg]
else:
errors = str(exc)
return fastapi.responses.JSONResponse(
status_code=422,
content={
"message": f"Invalid data for {request.method} {request.url.path}",
"detail": errors,
"hint": "Ensure the request matches the API schema.",
},
)
app.add_exception_handler(RequestValidationError, validation_error_handler)
app.add_exception_handler(pydantic.ValidationError, validation_error_handler)
app.add_exception_handler(ValueError, handle_internal_http_error(400))
app.add_exception_handler(Exception, handle_internal_http_error(500))
app.include_router(backend.server.routers.v1.v1_router, tags=["v1"], prefix="/api")

View File

@@ -1,5 +1,6 @@
"""Analytics API"""
import logging
from typing import Annotated
import fastapi
@@ -8,6 +9,7 @@ import backend.data.analytics
from backend.server.utils import get_user_id
router = fastapi.APIRouter()
logger = logging.getLogger(__name__)
@router.post(path="/log_raw_metric")
@@ -17,13 +19,25 @@ async def log_raw_metric(
metric_value: Annotated[float, fastapi.Body(..., embed=True)],
data_string: Annotated[str, fastapi.Body(..., embed=True)],
):
result = await backend.data.analytics.log_raw_metric(
user_id=user_id,
metric_name=metric_name,
metric_value=metric_value,
data_string=data_string,
)
return result.id
try:
result = await backend.data.analytics.log_raw_metric(
user_id=user_id,
metric_name=metric_name,
metric_value=metric_value,
data_string=data_string,
)
return result.id
except Exception as e:
logger.exception(
"Failed to log metric %s for user %s: %s", metric_name, user_id, e
)
raise fastapi.HTTPException(
status_code=500,
detail={
"message": str(e),
"hint": "Check analytics service connection and retry.",
},
)
@router.post("/log_raw_analytics")
@@ -43,7 +57,14 @@ async def log_raw_analytics(
),
],
):
result = await backend.data.analytics.log_raw_analytics(
user_id, type, data, data_index
)
return result.id
try:
result = await backend.data.analytics.log_raw_analytics(
user_id, type, data, data_index
)
return result.id
except Exception as e:
logger.exception("Failed to log analytics for user %s: %s", user_id, e)
raise fastapi.HTTPException(
status_code=500,
detail={"message": str(e), "hint": "Ensure analytics DB is reachable."},
)

View File

@@ -2,7 +2,7 @@ import logging
from typing import Annotated
from autogpt_libs.auth.middleware import APIKeyValidator
from fastapi import APIRouter, Body, Depends, Query
from fastapi import APIRouter, Body, Depends, HTTPException, Query
from fastapi.responses import JSONResponse
from backend.data.user import (
@@ -40,8 +40,11 @@ async def unsubscribe_via_one_click(token: Annotated[str, Query()]):
try:
await unsubscribe_user_by_token(token)
except Exception as e:
logger.error(f"Failed to unsubscribe user by token {token}: {e}")
raise e
logger.exception("Unsubscribe token %s failed: %s", token, e)
raise HTTPException(
status_code=500,
detail={"message": str(e), "hint": "Verify Postmark token settings."},
)
return JSONResponse(status_code=200, content={"status": "ok"})
@@ -67,7 +70,10 @@ async def postmark_webhook_handler(
case PostmarkSubscriptionChangeWebhook():
subscription_handler(webhook)
case _:
logger.warning(f"Unknown webhook type: {type(webhook)}")
logger.warning(
"Unhandled Postmark webhook type %s. Update handler mappings.",
type(webhook),
)
return
@@ -85,7 +91,10 @@ async def bounce_handler(event: PostmarkBounceWebhook):
logger.info(f"{event.Email=}")
user = await get_user_by_email(event.Email)
if not user:
logger.error(f"User not found for email: {event.Email}")
logger.warning(
"Received bounce for unknown email %s. Ensure user records are current.",
event.Email,
)
return
await set_user_email_verification(user.id, False)
logger.debug(f"Setting email verification to false for user: {user.id}")

View File

@@ -824,8 +824,15 @@ async def create_api_key(
)
return CreateAPIKeyResponse(api_key=api_key, plain_text_key=plain_text)
except APIKeyError as e:
logger.error(f"Failed to create API key: {str(e)}")
raise HTTPException(status_code=400, detail=str(e))
logger.error(
"Could not create API key for user %s: %s. Review input and permissions.",
user_id,
e,
)
raise HTTPException(
status_code=400,
detail={"message": str(e), "hint": "Verify request payload and try again."},
)
@v1_router.get(
@@ -841,8 +848,11 @@ async def get_api_keys(
try:
return await list_user_api_keys(user_id)
except APIKeyError as e:
logger.error(f"Failed to list API keys: {str(e)}")
raise HTTPException(status_code=400, detail=str(e))
logger.error("Failed to list API keys for user %s: %s", user_id, e)
raise HTTPException(
status_code=400,
detail={"message": str(e), "hint": "Check API key service availability."},
)
@v1_router.get(
@@ -861,8 +871,11 @@ async def get_api_key(
raise HTTPException(status_code=404, detail="API key not found")
return api_key
except APIKeyError as e:
logger.error(f"Failed to get API key: {str(e)}")
raise HTTPException(status_code=400, detail=str(e))
logger.error("Error retrieving API key %s for user %s: %s", key_id, user_id, e)
raise HTTPException(
status_code=400,
detail={"message": str(e), "hint": "Ensure the key ID is correct."},
)
@v1_router.delete(
@@ -883,8 +896,14 @@ async def delete_api_key(
except APIKeyPermissionError:
raise HTTPException(status_code=403, detail="Permission denied")
except APIKeyError as e:
logger.error(f"Failed to revoke API key: {str(e)}")
raise HTTPException(status_code=400, detail=str(e))
logger.error("Failed to revoke API key %s for user %s: %s", key_id, user_id, e)
raise HTTPException(
status_code=400,
detail={
"message": str(e),
"hint": "Verify permissions or try again later.",
},
)
@v1_router.post(
@@ -905,8 +924,11 @@ async def suspend_key(
except APIKeyPermissionError:
raise HTTPException(status_code=403, detail="Permission denied")
except APIKeyError as e:
logger.error(f"Failed to suspend API key: {str(e)}")
raise HTTPException(status_code=400, detail=str(e))
logger.error("Failed to suspend API key %s for user %s: %s", key_id, user_id, e)
raise HTTPException(
status_code=400,
detail={"message": str(e), "hint": "Check user permissions and retry."},
)
@v1_router.put(
@@ -929,5 +951,13 @@ async def update_permissions(
except APIKeyPermissionError:
raise HTTPException(status_code=403, detail="Permission denied")
except APIKeyError as e:
logger.error(f"Failed to update API key permissions: {str(e)}")
raise HTTPException(status_code=400, detail=str(e))
logger.error(
"Failed to update permissions for API key %s of user %s: %s",
key_id,
user_id,
e,
)
raise HTTPException(
status_code=400,
detail={"message": str(e), "hint": "Ensure permissions list is valid."},
)

View File

@@ -70,10 +70,10 @@ async def list_library_agents(
page_size=page_size,
)
except Exception as e:
logger.error(f"Could not fetch library agents: {e}")
logger.exception("Listing library agents failed for user %s: %s", user_id, e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get library agents",
detail={"message": str(e), "hint": "Inspect database connectivity."},
) from e
@@ -102,10 +102,17 @@ async def get_library_agent_by_store_listing_version_id(
store_listing_version_id, user_id
)
except Exception as e:
logger.error(f"Could not fetch library agent from store version ID: {e}")
logger.exception(
"Retrieving library agent by store version failed for user %s: %s",
user_id,
e,
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to add agent to library",
detail={
"message": str(e),
"hint": "Check if the store listing ID is valid.",
},
) from e
@@ -143,22 +150,31 @@ async def add_marketplace_agent_to_library(
)
except store_exceptions.AgentNotFoundError:
logger.warning(f"Agent not found: {store_listing_version_id}")
logger.warning(
"Store listing version %s not found when adding to library",
store_listing_version_id,
)
raise HTTPException(
status_code=404,
detail=f"Store listing version {store_listing_version_id} not found",
detail={
"message": f"Store listing version {store_listing_version_id} not found",
"hint": "Confirm the ID provided.",
},
)
except store_exceptions.DatabaseError as e:
logger.error(f"Database error occurred whilst adding agent to library: {e}")
logger.exception("Database error whilst adding agent to library: %s", e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to add agent to library",
detail={"message": str(e), "hint": "Inspect DB logs for details."},
) from e
except Exception as e:
logger.error(f"Unexpected error while adding agent: {e}")
logger.exception("Unexpected error while adding agent to library: %s", e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to add agent to library",
detail={
"message": str(e),
"hint": "Check server logs for more information.",
},
) from e
@@ -203,16 +219,16 @@ async def update_library_agent(
content={"message": "Agent updated successfully"},
)
except store_exceptions.DatabaseError as e:
logger.exception(f"Database error while updating library agent: {e}")
logger.exception("Database error while updating library agent: %s", e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to update library agent",
detail={"message": str(e), "hint": "Verify DB connection."},
) from e
except Exception as e:
logger.exception(f"Unexpected error while updating library agent: {e}")
logger.exception("Unexpected error while updating library agent: %s", e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to update library agent",
detail={"message": str(e), "hint": "Check server logs."},
) from e

View File

@@ -47,10 +47,13 @@ async def list_presets(
page_size=page_size,
)
except Exception as e:
logger.exception(f"Exception occurred while getting presets: {e}")
logger.exception("Failed to list presets for user %s: %s", user_id, e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get presets",
detail={
"message": str(e),
"hint": "Ensure the presets DB table is accessible.",
},
)
@@ -85,10 +88,12 @@ async def get_preset(
)
return preset
except Exception as e:
logger.exception(f"Exception occurred whilst getting preset: {e}")
logger.exception(
"Error retrieving preset %s for user %s: %s", preset_id, user_id, e
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get preset",
detail={"message": str(e), "hint": "Validate preset ID and retry."},
)
@@ -125,10 +130,10 @@ async def create_preset(
except NotFoundError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
except Exception as e:
logger.exception(f"Exception occurred while creating preset: {e}")
logger.exception("Preset creation failed for user %s: %s", user_id, e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to create preset",
detail={"message": str(e), "hint": "Check preset payload format."},
)
@@ -161,10 +166,10 @@ async def update_preset(
user_id=user_id, preset_id=preset_id, preset=preset
)
except Exception as e:
logger.exception(f"Exception occurred whilst updating preset: {e}")
logger.exception("Preset update failed for user %s: %s", user_id, e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to update preset",
detail={"message": str(e), "hint": "Check preset data and try again."},
)
@@ -191,10 +196,12 @@ async def delete_preset(
try:
await db.delete_preset(user_id, preset_id)
except Exception as e:
logger.exception(f"Exception occurred whilst deleting preset: {e}")
logger.exception(
"Error deleting preset %s for user %s: %s", preset_id, user_id, e
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to delete preset",
detail={"message": str(e), "hint": "Ensure preset exists before deleting."},
)
@@ -252,8 +259,11 @@ async def execute_preset(
except HTTPException:
raise
except Exception as e:
logger.exception(f"Exception occurred while executing preset: {e}")
logger.exception("Preset execution failed for user %s: %s", user_id, e)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e),
detail={
"message": str(e),
"hint": "Review preset configuration and graph ID.",
},
)

View File

@@ -1,7 +1,7 @@
import logging
from autogpt_libs.auth.middleware import auth_middleware
from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, HTTPException
from backend.server.utils import get_user_id
@@ -23,4 +23,12 @@ async def proxy_otto_request(
Proxy requests to Otto API while adding necessary security headers and logging.
Requires an authenticated user.
"""
return await OttoService.ask(request, user_id)
logger.debug("Forwarding request to Otto for user %s", user_id)
try:
return await OttoService.ask(request, user_id)
except Exception as e:
logger.exception("Otto request failed for user %s: %s", user_id, e)
raise HTTPException(
status_code=502,
detail={"message": str(e), "hint": "Check Otto service status."},
)

View File

@@ -48,11 +48,14 @@ async def get_profile(
content={"detail": "Profile not found"},
)
return profile
except Exception:
logger.exception("Exception occurred whilst getting user profile")
except Exception as e:
logger.exception("Failed to fetch user profile for %s: %s", user_id, e)
return fastapi.responses.JSONResponse(
status_code=500,
content={"detail": "An error occurred while retrieving the user profile"},
content={
"detail": "Failed to retrieve user profile",
"hint": "Check database connection.",
},
)
@@ -86,11 +89,14 @@ async def update_or_create_profile(
user_id=user_id, profile=profile
)
return updated_profile
except Exception:
logger.exception("Exception occurred whilst updating profile")
except Exception as e:
logger.exception("Failed to update profile for user %s: %s", user_id, e)
return fastapi.responses.JSONResponse(
status_code=500,
content={"detail": "An error occurred while updating the user profile"},
content={
"detail": "Failed to update user profile",
"hint": "Validate request data.",
},
)
@@ -160,11 +166,14 @@ async def get_agents(
page_size=page_size,
)
return agents
except Exception:
logger.exception("Exception occured whilst getting store agents")
except Exception as e:
logger.exception("Failed to retrieve store agents: %s", e)
return fastapi.responses.JSONResponse(
status_code=500,
content={"detail": "An error occurred while retrieving the store agents"},
content={
"detail": "Failed to retrieve store agents",
"hint": "Check database or search parameters.",
},
)

View File

@@ -35,7 +35,9 @@ async def verify_token(request: TurnstileVerifyRequest) -> TurnstileVerifyRespon
turnstile_verify_url = settings.secrets.turnstile_verify_url
if not turnstile_secret_key:
logger.error("Turnstile secret key is not configured")
logger.error(
"Turnstile secret key missing. Set TURNSTILE_SECRET_KEY to enable verification."
)
return TurnstileVerifyResponse(
success=False,
error="CONFIGURATION_ERROR",

View File

@@ -3,6 +3,7 @@ import logging
from contextlib import asynccontextmanager
from typing import Protocol
import pydantic
import uvicorn
from autogpt_libs.auth import parse_jwt_token
from autogpt_libs.logging.utils import generate_uvicorn_config
@@ -51,7 +52,11 @@ async def event_broadcaster(manager: ConnectionManager):
async for event in event_queue.listen("*"):
await manager.send_execution_update(event)
except Exception as e:
logger.exception(f"Event broadcaster error: {e}")
logger.exception(
"Event broadcaster stopped due to error: %s. "
"Verify the Redis connection and restart the service.",
e,
)
raise
@@ -221,7 +226,22 @@ async def websocket_router(
try:
while True:
data = await websocket.receive_text()
message = WSMessage.model_validate_json(data)
try:
message = WSMessage.model_validate_json(data)
except pydantic.ValidationError as e:
logger.error(
"Invalid WebSocket message from user #%s: %s",
user_id,
e,
)
await websocket.send_text(
WSMessage(
method=WSMethod.ERROR,
success=False,
error=("Invalid message format. Review the schema and retry"),
).model_dump_json()
)
continue
try:
if message.method in _MSG_HANDLERS:
@@ -232,6 +252,21 @@ async def websocket_router(
message=message,
)
continue
except pydantic.ValidationError as e:
logger.error(
"Validation error while handling '%s' for user #%s: %s",
message.method.value,
user_id,
e,
)
await websocket.send_text(
WSMessage(
method=WSMethod.ERROR,
success=False,
error="Invalid message data. Refer to the API schema",
).model_dump_json()
)
continue
except Exception as e:
logger.error(
f"Error while handling '{message.method.value}' message "