Compare commits

...

4 Commits

Author SHA1 Message Date
CHANGE 48f030241e debug: point agent server image to debug build
Use the debug agent-server image (daa5f02-python) that includes
verbose webhook auth logging.
2026-03-27 10:08:35 -04:00
CHANGE db8138a2ba debug: add verbose webhook auth logging to app server
Add WARNING-level logs to valid_sandbox in webhook_router.py to
diagnose 401 errors on incoming webhooks from agent server:

- Log received session_api_key value and all request headers on entry
- Log which rejection path is taken (missing header, invalid key, no user)
- Log sandbox service type used for lookup
- Log sandbox details when found (id, user, status, app_mode)
- Add branch to CI trigger for image build
2026-03-27 10:05:34 -04:00
CHANGE 880af856c5 revert: remove branch from CI build trigger 2026-03-26 14:28:24 -04:00
CHANGE 8ca630c718 feat(enterprise): acquire pg_advisory_lock before running database migrations
Prevent concurrent migration runs when multiple pods start simultaneously.
2026-03-26 13:26:41 -04:00
4 changed files with 27 additions and 2 deletions
+1
View File
@@ -10,6 +10,7 @@ on:
branches:
- main
- "saas-rel-*"
- "jl/debug-webhook-auth"
tags:
- "*"
pull_request:
+5 -1
View File
@@ -8,7 +8,7 @@ logging.getLogger('alembic.runtime.plugins').setLevel(logging.WARNING)
from alembic import context # noqa: E402
from google.cloud.sql.connector import Connector # noqa: E402
from sqlalchemy import create_engine # noqa: E402
from sqlalchemy import create_engine, text # noqa: E402
from storage.base import Base # noqa: E402
target_metadata = Base.metadata
@@ -109,6 +109,10 @@ def run_migrations_online() -> None:
version_table_schema=target_metadata.schema,
)
# Lock number must be unique — md5 hash of 'openhands_enterprise_migrations'
# Lock is released when the connection context manager exits
connection.execute(text('SELECT pg_advisory_lock(3617572382373537863)'))
with context.begin_transaction():
context.run_migrations()
@@ -63,7 +63,14 @@ async def valid_sandbox(
) -> SandboxInfo:
"""Use a session api key for validation, and get a sandbox. Subsequent actions
are executed in the context of the owner of the sandbox"""
_logger.warning(
f'webhook valid_sandbox: path={request.url.path}, '
f'session_api_key={session_api_key!r}, '
f'all_headers={dict(request.headers)}, '
f'client={request.client.host if request.client else "unknown"}'
)
if not session_api_key:
_logger.warning('webhook valid_sandbox: REJECTING - no session_api_key header')
raise HTTPException(
status.HTTP_401_UNAUTHORIZED, detail='X-Session-API-Key header is required'
)
@@ -74,14 +81,27 @@ async def valid_sandbox(
# Since we need access to all sandboxes, this is executed in the context of the admin.
setattr(state, USER_CONTEXT_ATTR, ADMIN)
async with get_sandbox_service(state) as sandbox_service:
_logger.warning(
f'webhook valid_sandbox: looking up key={session_api_key!r}, '
f'sandbox_service_type={type(sandbox_service).__name__}'
)
sandbox_info = await sandbox_service.get_sandbox_by_session_api_key(
session_api_key
)
if sandbox_info is None:
_logger.warning(
f'webhook valid_sandbox: REJECTING - sandbox not found for key={session_api_key!r}'
)
raise HTTPException(
status.HTTP_401_UNAUTHORIZED, detail='Invalid session API key'
)
_logger.warning(
f'webhook valid_sandbox: found sandbox_id={sandbox_info.id}, '
f'created_by_user_id={sandbox_info.created_by_user_id}, '
f'status={sandbox_info.status}, app_mode={app_mode}'
)
# In SAAS Mode there is always a user, so we set the owner of the sandbox
# as the current user (Validated by the session_api_key they provided)
if sandbox_info.created_by_user_id:
@@ -13,7 +13,7 @@ from openhands.sdk.utils.models import DiscriminatedUnionMixin
# The version of the agent server to use for deployments.
# Typically this will be the same as the values from the pyproject.toml
AGENT_SERVER_IMAGE = 'ghcr.io/openhands/agent-server:1.14.0-python'
AGENT_SERVER_IMAGE = 'ghcr.io/openhands/agent-server:daa5f02-python'
class SandboxSpecService(ABC):