Files
AutoGPT/autogpt_platform/backend/pyproject.toml
Zamil Majdy 1fdc02467b feat(backend): Add comprehensive Prometheus instrumentation for observability (#10923)
## Summary
- Implement comprehensive Prometheus metrics instrumentation for all
FastAPI services
- Add custom business metrics for graph/block executions
- Enable dual publishing to both Grafana Cloud and internal Prometheus

## Related Infrastructure PR
-
https://github.com/Significant-Gravitas/AutoGPT_cloud_infrastructure/pull/214

## Changes

### 📊 Metrics Infrastructure
- Added `prometheus-fastapi-instrumentator` dependency for automatic
HTTP metrics
- Created centralized `instrumentation.py` module for consistent metrics
across services
- Instrumented REST API, WebSocket, and External API services

### 📈 Automatic HTTP Metrics
All FastAPI services now automatically collect:
- **Request latency**: Histogram with custom buckets (10ms to 60s)
- **Request/response size**: Track payload sizes
- **Request counts**: By method, endpoint, and status code
- **Active requests**: Real-time count of in-progress requests
- **Error rates**: 4xx and 5xx responses

### 🎯 Custom Business Metrics
Added domain-specific metrics:
- **Graph executions**: Count by status (success/error/validation_error)
- **Block executions**: Count and duration by block_type and status
- **WebSocket connections**: Active connection gauge
- **Database queries**: Duration histogram by operation and table
- **RabbitMQ messages**: Count by queue and status
- **Authentication**: Attempts by method and status
- **API key usage**: By provider and block type
- **Rate limiting**: Hit count by endpoint

### 🔌 Service Endpoints
Each service exposes metrics at `/metrics`:
- REST API (port 8006): `/metrics`
- WebSocket (port 8001): `/metrics`
- External API: `/external-api/metrics`
- Executor (port 8002): Already had metrics, now enhanced

### 🏷️ Kubernetes Integration
Updated Helm charts with pod annotations:
```yaml
prometheus.io/scrape: "true"
prometheus.io/port: "8006"  # or appropriate port
prometheus.io/path: "/metrics"
```

## Testing
- [x] Install dependencies: `poetry install`
- [x] Run services: `poetry run serve`
- [x] Check metrics endpoints are accessible
- [x] Verify metrics are being collected
- [x] Confirm Grafana Agent can scrape metrics
- [x] Test graph/block execution tracking
- [x] Verify WebSocket connection metrics

## Performance Impact
- Minimal overhead (~1-2ms per request)
- Metrics are collected asynchronously
- Can be disabled via `ENABLE_METRICS=false` env var

## Next Steps
1. Deploy to dev environment
2. Configure Grafana Cloud dashboards
3. Set up alerting rules based on metrics
4. Add more custom business metrics as needed

🤖 Generated with [Claude Code](https://claude.ai/code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-09-16 12:58:04 +07:00

138 lines
3.4 KiB
TOML

[tool.poetry]
name = "autogpt-platform-backend"
version = "0.6.22"
description = "A platform for building AI-powered agentic workflows"
authors = ["AutoGPT <info@agpt.co>"]
readme = "README.md"
packages = [{ include = "backend", format = "sdist" }]
[tool.poetry.dependencies]
python = ">=3.10,<3.14"
aio-pika = "^9.5.5"
aiohttp = "^3.10.0"
aiodns = "^3.5.0"
anthropic = "^0.59.0"
apscheduler = "^3.11.0"
autogpt-libs = { path = "../autogpt_libs", develop = true }
bleach = { extras = ["css"], version = "^6.2.0" }
click = "^8.2.0"
cryptography = "^45.0"
discord-py = "^2.5.2"
e2b-code-interpreter = "^1.5.2"
fastapi = "^0.116.1"
feedparser = "^6.0.11"
flake8 = "^7.3.0"
google-api-python-client = "^2.177.0"
google-auth-oauthlib = "^1.2.2"
google-cloud-storage = "^3.2.0"
googlemaps = "^4.10.0"
gravitasml = "^0.1.3"
groq = "^0.30.0"
html2text = "^2024.2.26"
jinja2 = "^3.1.6"
jsonref = "^1.1.0"
jsonschema = "^4.25.0"
launchdarkly-server-sdk = "^9.12.0"
mem0ai = "^0.1.115"
moviepy = "^2.1.2"
ollama = "^0.5.1"
openai = "^1.97.1"
pika = "^1.3.2"
pinecone = "^7.3.0"
poetry = "2.1.1" # CHECK DEPENDABOT SUPPORT BEFORE UPGRADING
postmarker = "^1.0"
praw = "~7.8.1"
prisma = "^0.15.0"
prometheus-client = "^0.22.1"
prometheus-fastapi-instrumentator = "^7.0.0"
psutil = "^7.0.0"
psycopg2-binary = "^2.9.10"
pydantic = { extras = ["email"], version = "^2.11.7" }
pydantic-settings = "^2.10.1"
pytest = "^8.4.1"
pytest-asyncio = "^1.1.0"
python-dotenv = "^1.1.1"
python-multipart = "^0.0.20"
redis = "^6.2.0"
replicate = "^1.0.6"
sentry-sdk = {extras = ["anthropic", "fastapi", "launchdarkly", "openai", "sqlalchemy"], version = "^2.33.2"}
sqlalchemy = "^2.0.40"
strenum = "^0.4.9"
stripe = "^11.5.0"
supabase = "2.17.0"
tenacity = "^9.1.2"
todoist-api-python = "^2.1.7"
tweepy = "^4.16.0"
uvicorn = { extras = ["standard"], version = "^0.35.0" }
websockets = "^15.0"
youtube-transcript-api = "^1.2.1"
zerobouncesdk = "^1.1.2"
# NOTE: please insert new dependencies in their alphabetical location
pytest-snapshot = "^0.9.0"
aiofiles = "^24.1.0"
tiktoken = "^0.9.0"
aioclamd = "^1.0.0"
setuptools = "^80.9.0"
gcloud-aio-storage = "^9.5.0"
pandas = "^2.3.1"
firecrawl-py = "^2.16.3"
exa-py = "^1.14.20"
croniter = "^6.0.0"
stagehand = "^0.5.1"
[tool.poetry.group.dev.dependencies]
aiohappyeyeballs = "^2.6.1"
black = "^24.10.0"
faker = "^37.6.0"
httpx = "^0.28.1"
isort = "^5.13.2"
poethepoet = "^0.37.0"
pre-commit = "^4.3.0"
pyright = "^1.1.404"
pytest-mock = "^3.14.0"
pytest-watcher = "^0.4.2"
requests = "^2.32.5"
ruff = "^0.12.11"
# NOTE: please insert new dependencies in their alphabetical location
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.scripts]
app = "backend.app:main"
rest = "backend.rest:main"
db = "backend.db:main"
ws = "backend.ws:main"
scheduler = "backend.scheduler:main"
notification = "backend.notification:main"
executor = "backend.exec:main"
cli = "backend.cli:main"
format = "linter:format"
lint = "linter:lint"
test = "run_tests:test"
[tool.isort]
profile = "black"
[tool.pytest-watcher]
now = false
clear = true
delay = 0.2
runner = "pytest"
runner_args = []
patterns = ["*.py"]
ignore_patterns = []
[tool.pytest.ini_options]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "session"
filterwarnings = [
"ignore:'audioop' is deprecated:DeprecationWarning:discord.player",
"ignore:invalid escape sequence:DeprecationWarning:tweepy.api",
]
[tool.ruff]
target-version = "py310"