mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
Added docs and more specific error handling
This commit is contained in:
@@ -20,10 +20,36 @@ PRISMA_SCHEMA="postgres/schema.prisma"
|
||||
# SQLAlchemy Configuration (for gradual migration from Prisma)
|
||||
# Set to true to enable SQLAlchemy alongside Prisma (both ORMs coexist during migration)
|
||||
ENABLE_SQLALCHEMY=false
|
||||
|
||||
# Connection Pool Configuration
|
||||
# IMPORTANT: With 6 backend processes, total connections = 6 × (POOL_SIZE + MAX_OVERFLOW)
|
||||
# Must stay under PostgreSQL max_connections (default: 100)
|
||||
#
|
||||
# Environment-specific recommendations:
|
||||
# Development: POOL_SIZE=2-3, MAX_OVERFLOW=1-2 (lightweight, fast startup)
|
||||
# Test/CI: POOL_SIZE=2, MAX_OVERFLOW=1 (minimal resources, parallel test safety)
|
||||
# Production: POOL_SIZE=10-20, MAX_OVERFLOW=5-10 (handle real traffic and bursts)
|
||||
#
|
||||
# Default values below are suitable for production use:
|
||||
SQLALCHEMY_POOL_SIZE=10
|
||||
SQLALCHEMY_MAX_OVERFLOW=5
|
||||
|
||||
# Timeout Configuration
|
||||
# POOL_TIMEOUT: How long to wait for an available connection from the pool (when all connections busy)
|
||||
# CONNECT_TIMEOUT: How long to wait when establishing a NEW connection to PostgreSQL
|
||||
#
|
||||
# Environment-specific recommendations:
|
||||
# Development: POOL_TIMEOUT=10-30s, CONNECT_TIMEOUT=5-10s
|
||||
# Test/CI: POOL_TIMEOUT=5-10s, CONNECT_TIMEOUT=5-10s (fail fast)
|
||||
# Production: POOL_TIMEOUT=30s, CONNECT_TIMEOUT=10-15s
|
||||
#
|
||||
# Default values below are suitable for production use:
|
||||
SQLALCHEMY_POOL_TIMEOUT=30
|
||||
SQLALCHEMY_CONNECT_TIMEOUT=10
|
||||
|
||||
# SQL Query Logging
|
||||
# Set to true to log ALL SQL statements (very verbose, useful for debugging)
|
||||
# Should always be false in production
|
||||
SQLALCHEMY_ECHO=false
|
||||
|
||||
## ===== REQUIRED SERVICE CREDENTIALS ===== ##
|
||||
|
||||
@@ -91,6 +91,9 @@ class DatabaseManager(AppService):
|
||||
# Initialize SQLAlchemy if enabled (for gradual migration from Prisma)
|
||||
if config.enable_sqlalchemy:
|
||||
try:
|
||||
from sqlalchemy.exc import DatabaseError, OperationalError
|
||||
from sqlalchemy.exc import TimeoutError as SQLAlchemyTimeoutError
|
||||
|
||||
from backend.data import sqlalchemy as sa
|
||||
|
||||
engine = sa.create_engine()
|
||||
@@ -101,9 +104,37 @@ class DatabaseManager(AppService):
|
||||
f"(pool_size={config.sqlalchemy_pool_size}, "
|
||||
f"max_overflow={config.sqlalchemy_max_overflow})"
|
||||
)
|
||||
except OperationalError as e:
|
||||
logger.error(
|
||||
f"[{self.service_name}] Failed to connect to database during SQLAlchemy initialization. "
|
||||
f"Check database connection settings (host, port, credentials). "
|
||||
f"Database URL: {config.database_url.split('@')[-1] if '@' in config.database_url else 'N/A'}. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
raise
|
||||
except SQLAlchemyTimeoutError as e:
|
||||
logger.error(
|
||||
f"[{self.service_name}] Database connection timeout during SQLAlchemy initialization. "
|
||||
f"Timeout setting: {config.sqlalchemy_connect_timeout}s. "
|
||||
f"Check if database is accessible and increase timeout if needed. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
raise
|
||||
except DatabaseError as e:
|
||||
logger.error(
|
||||
f"[{self.service_name}] Database error during SQLAlchemy initialization. "
|
||||
f"Check database permissions and configuration. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[{self.service_name}] Failed to initialize SQLAlchemy: {e}"
|
||||
f"[{self.service_name}] Unexpected error during SQLAlchemy initialization. "
|
||||
f"Configuration: pool_size={config.sqlalchemy_pool_size}, "
|
||||
f"max_overflow={config.sqlalchemy_max_overflow}, "
|
||||
f"pool_timeout={config.sqlalchemy_pool_timeout}s. "
|
||||
f"Error: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
raise
|
||||
|
||||
@@ -115,13 +146,25 @@ class DatabaseManager(AppService):
|
||||
# Dispose SQLAlchemy if it was enabled
|
||||
if config.enable_sqlalchemy:
|
||||
try:
|
||||
from sqlalchemy.exc import DatabaseError, OperationalError
|
||||
|
||||
from backend.data import sqlalchemy as sa
|
||||
|
||||
await sa.dispose()
|
||||
logger.info(f"[{self.service_name}] ✓ SQLAlchemy disposed")
|
||||
except (OperationalError, DatabaseError) as e:
|
||||
# Log as warning since disposal failures during shutdown are non-critical
|
||||
logger.warning(
|
||||
f"[{self.service_name}] Database error while disposing SQLAlchemy connections. "
|
||||
f"This may leave connections open but won't affect shutdown. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[{self.service_name}] Error disposing SQLAlchemy: {e}"
|
||||
f"[{self.service_name}] Unexpected error while disposing SQLAlchemy. "
|
||||
f"Connection pool may not be cleanly released. "
|
||||
f"Error: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
await db.disconnect()
|
||||
|
||||
@@ -82,6 +82,9 @@ async def lifespan_context(app: fastapi.FastAPI):
|
||||
config = backend.util.settings.Config()
|
||||
if config.enable_sqlalchemy:
|
||||
try:
|
||||
from sqlalchemy.exc import DatabaseError, OperationalError
|
||||
from sqlalchemy.exc import TimeoutError as SQLAlchemyTimeoutError
|
||||
|
||||
from backend.data import sqlalchemy as sa
|
||||
|
||||
engine = sa.create_engine()
|
||||
@@ -92,8 +95,38 @@ async def lifespan_context(app: fastapi.FastAPI):
|
||||
f"(pool_size={config.sqlalchemy_pool_size}, "
|
||||
f"max_overflow={config.sqlalchemy_max_overflow})"
|
||||
)
|
||||
except OperationalError as e:
|
||||
logger.error(
|
||||
f"Failed to connect to database during SQLAlchemy initialization. "
|
||||
f"Check database connection settings (host, port, credentials). "
|
||||
f"Database URL: {config.database_url.split('@')[-1] if '@' in config.database_url else 'N/A'}. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
raise
|
||||
except SQLAlchemyTimeoutError as e:
|
||||
logger.error(
|
||||
f"Database connection timeout during SQLAlchemy initialization. "
|
||||
f"Timeout setting: {config.sqlalchemy_connect_timeout}s. "
|
||||
f"Check if database is accessible and increase timeout if needed. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
raise
|
||||
except DatabaseError as e:
|
||||
logger.error(
|
||||
f"Database error during SQLAlchemy initialization. "
|
||||
f"Check database permissions and configuration. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize SQLAlchemy: {e}")
|
||||
logger.error(
|
||||
f"Unexpected error during SQLAlchemy initialization. "
|
||||
f"Configuration: pool_size={config.sqlalchemy_pool_size}, "
|
||||
f"max_overflow={config.sqlalchemy_max_overflow}, "
|
||||
f"pool_timeout={config.sqlalchemy_pool_timeout}s. "
|
||||
f"Error: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
raise
|
||||
|
||||
# Configure thread pool for FastAPI sync operation performance
|
||||
@@ -139,12 +172,26 @@ async def lifespan_context(app: fastapi.FastAPI):
|
||||
# Dispose SQLAlchemy if it was enabled
|
||||
if config.enable_sqlalchemy:
|
||||
try:
|
||||
from sqlalchemy.exc import DatabaseError, OperationalError
|
||||
|
||||
from backend.data import sqlalchemy as sa
|
||||
|
||||
await sa.dispose()
|
||||
logger.info("✓ AgentServer: SQLAlchemy disposed")
|
||||
except (OperationalError, DatabaseError) as e:
|
||||
# Log as warning since disposal failures during shutdown are non-critical
|
||||
logger.warning(
|
||||
f"Database error while disposing SQLAlchemy connections. "
|
||||
f"This may leave connections open but won't affect shutdown. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error disposing SQLAlchemy: {e}")
|
||||
logger.warning(
|
||||
f"Unexpected error while disposing SQLAlchemy. "
|
||||
f"Connection pool may not be cleanly released. "
|
||||
f"Error: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
await backend.data.db.disconnect()
|
||||
|
||||
|
||||
@@ -285,8 +285,14 @@ class Config(UpdateTrackingModel["Config"], BaseSettings):
|
||||
ge=1,
|
||||
le=100,
|
||||
description="Number of persistent connections in the SQLAlchemy pool. "
|
||||
"Guidelines: REST API (high traffic) 10-20, Background workers 3-5. "
|
||||
"Total across all services should not exceed PostgreSQL max_connections (default: 100).",
|
||||
"Environment-specific recommendations: "
|
||||
"Development: 2-3 (lightweight, fast startup), "
|
||||
"Test/CI: 2 (minimal resources, avoid connection exhaustion in parallel tests), "
|
||||
"Production: 10-20 for REST API (high traffic), 3-5 for background workers. "
|
||||
"IMPORTANT: Total connections across ALL services (pool_size + max_overflow per service) "
|
||||
"must not exceed PostgreSQL max_connections (default: 100). "
|
||||
"With 6 processes in production (rest-api, executor, database-manager, scheduler, websocket, comms), "
|
||||
"calculate: 6 × (pool_size + max_overflow) ≤ 100.",
|
||||
)
|
||||
|
||||
sqlalchemy_max_overflow: int = Field(
|
||||
@@ -294,22 +300,42 @@ class Config(UpdateTrackingModel["Config"], BaseSettings):
|
||||
ge=0,
|
||||
le=50,
|
||||
description="Additional connections beyond pool_size when pool is exhausted. "
|
||||
"Total max connections = pool_size + max_overflow.",
|
||||
"Total max connections per service = pool_size + max_overflow. "
|
||||
"Environment-specific recommendations: "
|
||||
"Development: 1-2 (handles occasional bursts), "
|
||||
"Test/CI: 1 (minimal extra connections), "
|
||||
"Production: 5-10 (handles traffic spikes without exhausting pool). "
|
||||
"Setting to 0 means strict pool limit (connections fail when pool is exhausted). "
|
||||
"Higher values provide better burst handling but consume more database connections.",
|
||||
)
|
||||
|
||||
sqlalchemy_pool_timeout: int = Field(
|
||||
default=30,
|
||||
ge=1,
|
||||
le=300,
|
||||
description="Seconds to wait for available connection before raising error. "
|
||||
"If all connections are busy and max_overflow is reached, requests wait this long before failing.",
|
||||
description="Seconds to wait for available connection from pool before raising TimeoutError. "
|
||||
"This timeout applies ONLY when all connections (pool_size + max_overflow) are busy. "
|
||||
"Environment-specific recommendations: "
|
||||
"Development: 10-30s (generous for debugging), "
|
||||
"Test/CI: 5-10s (fail fast in tests), "
|
||||
"Production: 30s (balance between user experience and resource holding). "
|
||||
"If you see frequent TimeoutErrors, either increase pool_size/max_overflow or investigate slow queries. "
|
||||
"NOTE: This is different from sqlalchemy_connect_timeout (which applies when establishing new connections).",
|
||||
)
|
||||
|
||||
sqlalchemy_connect_timeout: int = Field(
|
||||
default=10,
|
||||
ge=1,
|
||||
le=60,
|
||||
description="Seconds to wait when establishing new connection to PostgreSQL.",
|
||||
description="Seconds to wait when establishing NEW connection to PostgreSQL database. "
|
||||
"This timeout applies at the network/TCP level when creating connections (not when acquiring from pool). "
|
||||
"Environment-specific recommendations: "
|
||||
"Development: 5-10s (local database should connect quickly), "
|
||||
"Test/CI: 5-10s (fail fast if database unavailable), "
|
||||
"Production: 10-15s (account for network latency, especially with cloud databases). "
|
||||
"If you see frequent connection timeout errors during startup, check database accessibility "
|
||||
"and network connectivity. "
|
||||
"NOTE: This is different from sqlalchemy_pool_timeout (which applies when waiting for available connections from pool).",
|
||||
)
|
||||
|
||||
sqlalchemy_echo: bool = Field(
|
||||
|
||||
Reference in New Issue
Block a user