feat(backend): cache control headers (#10160)

### Why? 🤔
  <!-- Clearly explain the need for these changes: -->
  We need to prevent sensitive data (authentication tokens, API
  keys, user credentials, personal information) from being cached by
   browsers and proxies. Following the principle of "secure by
  default", we're switching from a deny list to an allow list
  approach for cache control.

  ### Changes 🛠️
  <!-- Concisely describe all of the changes made in this pull
  request: -->
  - **Refactored cache control middleware from deny list to allow 
  list approach**
    - By default, ALL endpoints now have `Cache-Control: no-store,
  no-cache, must-revalidate, private` headers
    - Only explicitly allowed paths (static assets, health checks,
  public store pages) can be cached
    - This ensures new endpoints are automatically protected without
   developers having to remember to add them to a list

  - **Updated `SecurityHeadersMiddleware` in 
  `/backend/backend/server/middleware/security.py`**
    - Renamed `SENSITIVE_PATHS` to `CACHEABLE_PATHS`
    - Inverted the logic in `is_cacheable_path()` method
    - Cache control headers are now applied to all paths NOT in the
  allow list

  - **Updated test suite to match new behavior**
    - Tests now verify that most endpoints have cache control
  headers by default
    - Tests verify that only allowed paths (static assets, health
  endpoints, etc.) can be cached

  - **Updated documentation in `CLAUDE.md`**
    - Documented the new allow list approach
    - Added instructions for developers on how to allow caching for
  new endpoints


### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  <!-- Put your test plan here: -->
  - [x] Test modified endpoints work still
  - [x] Test modified endpoints correctly have no cache rules

---------

Co-authored-by: Swifty <craigswift13@gmail.com>
This commit is contained in:
Nicholas Tindle
2025-06-20 09:19:52 -05:00
committed by GitHub
parent 3df6dcd26b
commit 568f5a449e
5 changed files with 257 additions and 1 deletions

View File

@@ -32,6 +32,7 @@ poetry run test
poetry run pytest path/to/test_file.py::test_function_name
# Lint and format
# prefer format if you want to just "fix" it and only get the errors that can't be autofixed
poetry run format # Black + isort
poetry run lint # ruff
```
@@ -77,6 +78,7 @@ npm run type-check
- **Queue System**: RabbitMQ for async task processing
- **Execution Engine**: Separate executor service processes agent workflows
- **Authentication**: JWT-based with Supabase integration
- **Security**: Cache protection middleware prevents sensitive data caching in browsers/proxies
### Frontend Architecture
- **Framework**: Next.js App Router with React Server Components
@@ -129,4 +131,15 @@ Key models (defined in `/backend/schema.prisma`):
1. Components go in `/frontend/src/components/`
2. Use existing UI components from `/frontend/src/components/ui/`
3. Add Storybook stories for new components
4. Test with Playwright if user-facing
4. Test with Playwright if user-facing
### Security Implementation
**Cache Protection Middleware:**
- Located in `/backend/backend/server/middleware/security.py`
- Default behavior: Disables caching for ALL endpoints with `Cache-Control: no-store, no-cache, must-revalidate, private`
- Uses an allow list approach - only explicitly permitted paths can be cached
- Cacheable paths include: static assets (`/static/*`, `/_next/static/*`), health checks, public store pages, documentation
- Prevents sensitive data (auth tokens, API keys, user data) from being cached by browsers/proxies
- To allow caching for a new endpoint, add it to `CACHEABLE_PATHS` in the middleware
- Applied to both main API server and external API applications

View File

@@ -1,5 +1,7 @@
from fastapi import FastAPI
from backend.server.middleware.security import SecurityHeadersMiddleware
from .routes.v1 import v1_router
external_app = FastAPI(
@@ -8,4 +10,6 @@ external_app = FastAPI(
docs_url="/docs",
version="1.0",
)
external_app.add_middleware(SecurityHeadersMiddleware)
external_app.include_router(v1_router, prefix="/v1")

View File

@@ -0,0 +1,93 @@
import re
from typing import Set
from fastapi import Request, Response
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.types import ASGIApp
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
"""
Middleware to add security headers to responses, with cache control
disabled by default for all endpoints except those explicitly allowed.
"""
CACHEABLE_PATHS: Set[str] = {
# Static assets
"/static",
"/_next/static",
"/assets",
"/images",
"/css",
"/js",
"/fonts",
# Public API endpoints
"/api/health",
"/api/v1/health",
"/api/status",
# Public store/marketplace pages (read-only)
"/api/store/agents",
"/api/v1/store/agents",
"/api/store/categories",
"/api/v1/store/categories",
"/api/store/featured",
"/api/v1/store/featured",
# Public graph templates (read-only, no user data)
"/api/graphs/templates",
"/api/v1/graphs/templates",
# Documentation endpoints
"/api/docs",
"/api/v1/docs",
"/docs",
"/swagger",
"/openapi.json",
# Favicon and manifest
"/favicon.ico",
"/manifest.json",
"/robots.txt",
"/sitemap.xml",
}
def __init__(self, app: ASGIApp):
super().__init__(app)
# Compile regex patterns for wildcard matching
self.cacheable_patterns = [
re.compile(pattern.replace("*", "[^/]+"))
for pattern in self.CACHEABLE_PATHS
if "*" in pattern
]
self.exact_paths = {path for path in self.CACHEABLE_PATHS if "*" not in path}
def is_cacheable_path(self, path: str) -> bool:
"""Check if the given path is allowed to be cached."""
# Check exact matches first
for cacheable_path in self.exact_paths:
if path.startswith(cacheable_path):
return True
# Check pattern matches
for pattern in self.cacheable_patterns:
if pattern.match(path):
return True
return False
async def dispatch(self, request: Request, call_next):
response: Response = await call_next(request)
# Add general security headers
response.headers["X-Content-Type-Options"] = "nosniff"
response.headers["X-Frame-Options"] = "DENY"
response.headers["X-XSS-Protection"] = "1; mode=block"
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
# Default: Disable caching for all endpoints
# Only allow caching for explicitly permitted paths
if not self.is_cacheable_path(request.url.path):
response.headers["Cache-Control"] = (
"no-store, no-cache, must-revalidate, private"
)
response.headers["Pragma"] = "no-cache"
response.headers["Expires"] = "0"
return response

View File

@@ -0,0 +1,143 @@
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from starlette.applications import Starlette
from backend.server.middleware.security import SecurityHeadersMiddleware
@pytest.fixture
def app():
"""Create a test FastAPI app with security middleware."""
app = FastAPI()
app.add_middleware(SecurityHeadersMiddleware)
@app.get("/api/auth/user")
def get_user():
return {"user": "test"}
@app.get("/api/v1/integrations/oauth/google")
def oauth_endpoint():
return {"oauth": "data"}
@app.get("/api/graphs/123/execute")
def execute_graph():
return {"execution": "data"}
@app.get("/api/integrations/credentials")
def get_credentials():
return {"credentials": "sensitive"}
@app.get("/api/store/agents")
def store_agents():
return {"agents": "public list"}
@app.get("/api/health")
def health_check():
return {"status": "ok"}
@app.get("/static/logo.png")
def static_file():
return {"static": "content"}
return app
@pytest.fixture
def client(app):
"""Create a test client."""
return TestClient(app)
def test_non_cacheable_endpoints_have_cache_control_headers(client):
"""Test that non-cacheable endpoints (most endpoints) have proper cache control headers."""
non_cacheable_endpoints = [
"/api/auth/user",
"/api/v1/integrations/oauth/google",
"/api/graphs/123/execute",
"/api/integrations/credentials",
]
for endpoint in non_cacheable_endpoints:
response = client.get(endpoint)
# Check cache control headers are present (default behavior)
assert (
response.headers["Cache-Control"]
== "no-store, no-cache, must-revalidate, private"
)
assert response.headers["Pragma"] == "no-cache"
assert response.headers["Expires"] == "0"
# Check general security headers
assert response.headers["X-Content-Type-Options"] == "nosniff"
assert response.headers["X-Frame-Options"] == "DENY"
assert response.headers["X-XSS-Protection"] == "1; mode=block"
assert response.headers["Referrer-Policy"] == "strict-origin-when-cross-origin"
def test_cacheable_endpoints_dont_have_cache_control_headers(client):
"""Test that explicitly cacheable endpoints don't have restrictive cache control headers."""
cacheable_endpoints = [
"/api/store/agents",
"/api/health",
"/static/logo.png",
]
for endpoint in cacheable_endpoints:
response = client.get(endpoint)
# Should NOT have restrictive cache control headers
assert (
"Cache-Control" not in response.headers
or "no-store" not in response.headers.get("Cache-Control", "")
)
assert (
"Pragma" not in response.headers
or response.headers.get("Pragma") != "no-cache"
)
assert (
"Expires" not in response.headers or response.headers.get("Expires") != "0"
)
# Should still have general security headers
assert response.headers["X-Content-Type-Options"] == "nosniff"
assert response.headers["X-Frame-Options"] == "DENY"
assert response.headers["X-XSS-Protection"] == "1; mode=block"
assert response.headers["Referrer-Policy"] == "strict-origin-when-cross-origin"
def test_is_cacheable_path_detection():
"""Test the path detection logic."""
middleware = SecurityHeadersMiddleware(Starlette())
# Test cacheable paths (allow list)
assert middleware.is_cacheable_path("/api/health")
assert middleware.is_cacheable_path("/api/v1/health")
assert middleware.is_cacheable_path("/static/image.png")
assert middleware.is_cacheable_path("/api/store/agents")
assert middleware.is_cacheable_path("/docs")
assert middleware.is_cacheable_path("/favicon.ico")
# Test non-cacheable paths (everything else)
assert not middleware.is_cacheable_path("/api/auth/user")
assert not middleware.is_cacheable_path("/api/v1/integrations/oauth/callback")
assert not middleware.is_cacheable_path("/api/integrations/credentials/123")
assert not middleware.is_cacheable_path("/api/graphs/abc123/execute")
assert not middleware.is_cacheable_path("/api/store/xyz/submissions")
def test_path_prefix_matching():
"""Test that path prefix matching works correctly."""
middleware = SecurityHeadersMiddleware(Starlette())
# Test that paths starting with cacheable prefixes are cacheable
assert middleware.is_cacheable_path("/static/css/style.css")
assert middleware.is_cacheable_path("/static/js/app.js")
assert middleware.is_cacheable_path("/assets/images/logo.png")
assert middleware.is_cacheable_path("/_next/static/chunks/main.js")
# Test that other API paths are not cacheable by default
assert not middleware.is_cacheable_path("/api/users/profile")
assert not middleware.is_cacheable_path("/api/v1/private/data")
assert not middleware.is_cacheable_path("/api/billing/subscription")

View File

@@ -38,6 +38,7 @@ from backend.blocks.llm import LlmModel
from backend.data.model import Credentials
from backend.integrations.providers import ProviderName
from backend.server.external.api import external_app
from backend.server.middleware.security import SecurityHeadersMiddleware
settings = backend.util.settings.Settings()
logger = logging.getLogger(__name__)
@@ -114,6 +115,8 @@ app = fastapi.FastAPI(
generate_unique_id_function=custom_generate_unique_id,
)
app.add_middleware(SecurityHeadersMiddleware)
def handle_internal_http_error(status_code: int = 500, log_error: bool = True):
def handler(request: fastapi.Request, exc: Exception):