Compare commits

..

7 Commits

Author SHA1 Message Date
Nicholas Tindle
ad1a814724 Update manager.py 2026-02-09 13:42:14 -06:00
Otto
562cf04ab6 refactor(backend): extract shared auto-credential parsing to utils.py
Addresses review feedback on #12004:
- Added AutoCredentialFieldInfo dataclass and parse_auto_credential_field()
  helper to executor/utils.py
- Updated _acquire_auto_credentials in manager.py to use shared helper
- Updated _validate_node_input_credentials in utils.py to use shared helper

This consolidates the duplicate logic for parsing GoogleDriveFileField-style
auto-credential fields, making manager.py less cluttered while ensuring
consistent validation/acquisition behavior.
2026-02-09 07:57:36 +00:00
Nicholas Tindle
90b3b5ba16 fix(backend): Fix misplaced section header in graph_test.py
Move the _reassign_ids section comment to above the actual _reassign_ids
tests, and label the combine() tests correctly.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 16:11:47 -06:00
Nicholas Tindle
f4f81bc4fc fix(backend): Remove _credentials_id key on fork instead of setting to None
Setting _credentials_id to None on fork was ambiguous — both "forked,
needs re-auth" and "chained data from upstream" were represented as None.
This caused _acquire_auto_credentials to silently skip credential
acquisition for forked agents, leading to confusing TypeErrors at runtime.

Now the key is deleted entirely, making the three states unambiguous:
- Present with value: user-selected credentials
- Present as None: chained data from upstream block
- Absent: forked/needs re-authentication

Also adds pre-run validation for the missing key case and makes error
messages provider-agnostic.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 17:34:16 -06:00
Nicholas Tindle
c5abc01f25 fix(backend): Add error handling for auto-credentials store lookup
Wrap get_creds_by_id call in try/except in the auto-credentials
validation path to match the error handling pattern used for regular
credentials.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 16:53:29 -06:00
Nicholas Tindle
8b7053c1de merge: Resolve conflicts with dev (PR #11986 graph model refactor)
Adapt auto-credentials filtering to dev's refactored graph model:
- aggregate_credentials_inputs() now returns 3-tuples (field_info, node_pairs, is_required)
- credentials_input_schema moved to GraphModel, builds JSON schema directly
- Update regular/auto_credentials_inputs properties for 3-tuple format
- Update test mocks and assertions for new tuple format and class hierarchy

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 16:39:57 -06:00
Nicholas Tindle
e00c1202ad fix(platform): Fix Google Drive auto-credentials handling across the platform
- Tag auto-credentials with `is_auto_credential` and `input_field_name` on `CredentialsFieldInfo` to distinguish them from regular user-provided credentials
- Add `regular_credentials_inputs` and `auto_credentials_inputs` properties to `Graph` so UI schemas, CoPilot, and library presets only surface regular credentials
- Extract `_acquire_auto_credentials()` helper in executor to resolve embedded `_credentials_id` at execution time with proper lock management
- Validate auto-credentials ownership in `_validate_node_input_credentials()` to catch stale/missing credentials before execution
- Clear `_credentials_id` in `_reassign_ids()` on graph fork so cloned agents require re-authentication
- Propagate `is_auto_credential` through `combine()` and `discriminate()` on `CredentialsFieldInfo`
- Add `referrerPolicy: "no-referrer-when-downgrade"` to Google API script loading to fix Firefox API key validation
- Comprehensive test coverage for all new behavior

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 16:08:53 -06:00
56 changed files with 5143 additions and 6984 deletions

View File

@@ -49,7 +49,7 @@ jobs:
- name: Create PR ${{ env.BUILD_BRANCH }} -> ${{ github.ref_name }}
if: github.event_name == 'push'
uses: peter-evans/create-pull-request@v8
uses: peter-evans/create-pull-request@v7
with:
add-paths: classic/frontend/build/web
base: ${{ github.ref_name }}

View File

@@ -42,7 +42,7 @@ jobs:
- name: Get CI failure details
id: failure_details
uses: actions/github-script@v8
uses: actions/github-script@v7
with:
script: |
const run = await github.rest.actions.getWorkflowRun({

View File

@@ -41,7 +41,7 @@ jobs:
python-version: "3.11" # Use standard version matching CI
- name: Set up Python dependency cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -78,7 +78,7 @@ jobs:
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
- name: Set up Node.js
uses: actions/setup-node@v6
uses: actions/setup-node@v4
with:
node-version: "22"
@@ -91,7 +91,7 @@ jobs:
echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV
- name: Cache frontend dependencies
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.pnpm-store
key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -124,7 +124,7 @@ jobs:
# Phase 1: Cache and load Docker images for faster setup
- name: Set up Docker image cache
id: docker-cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/docker-cache
# Use a versioned key for cache invalidation when image list changes
@@ -309,7 +309,6 @@ jobs:
uses: anthropics/claude-code-action@v1
with:
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
allowed_bots: "dependabot[bot]"
claude_args: |
--allowedTools "Bash(npm:*),Bash(pnpm:*),Bash(poetry:*),Bash(git:*),Edit,Replace,NotebookEditCell,mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*), Bash(gh pr diff:*), Bash(gh pr view:*)"
prompt: |

View File

@@ -57,7 +57,7 @@ jobs:
python-version: "3.11" # Use standard version matching CI
- name: Set up Python dependency cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -94,7 +94,7 @@ jobs:
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
- name: Set up Node.js
uses: actions/setup-node@v6
uses: actions/setup-node@v4
with:
node-version: "22"
@@ -107,7 +107,7 @@ jobs:
echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV
- name: Cache frontend dependencies
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.pnpm-store
key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -140,7 +140,7 @@ jobs:
# Phase 1: Cache and load Docker images for faster setup
- name: Set up Docker image cache
id: docker-cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/docker-cache
# Use a versioned key for cache invalidation when image list changes

View File

@@ -39,7 +39,7 @@ jobs:
python-version: "3.11" # Use standard version matching CI
- name: Set up Python dependency cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}
@@ -76,7 +76,7 @@ jobs:
# Frontend Node.js/pnpm setup (mirrors platform-frontend-ci.yml)
- name: Set up Node.js
uses: actions/setup-node@v6
uses: actions/setup-node@v4
with:
node-version: "22"
@@ -89,7 +89,7 @@ jobs:
echo "PNPM_HOME=$HOME/.pnpm-store" >> $GITHUB_ENV
- name: Cache frontend dependencies
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.pnpm-store
key: ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}
@@ -132,7 +132,7 @@ jobs:
# Phase 1: Cache and load Docker images for faster setup
- name: Set up Docker image cache
id: docker-cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/docker-cache
# Use a versioned key for cache invalidation when image list changes

View File

@@ -33,7 +33,7 @@ jobs:
python-version: "3.11"
- name: Set up Python dependency cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}

View File

@@ -33,7 +33,7 @@ jobs:
python-version: "3.11"
- name: Set up Python dependency cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}

View File

@@ -38,7 +38,7 @@ jobs:
python-version: "3.11"
- name: Set up Python dependency cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}

View File

@@ -88,7 +88,7 @@ jobs:
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Set up Python dependency cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry
key: poetry-${{ runner.os }}-${{ hashFiles('autogpt_platform/backend/poetry.lock') }}

View File

@@ -17,7 +17,7 @@ jobs:
- name: Check comment permissions and deployment status
id: check_status
if: github.event_name == 'issue_comment' && github.event.issue.pull_request
uses: actions/github-script@v8
uses: actions/github-script@v7
with:
script: |
const commentBody = context.payload.comment.body.trim();
@@ -55,7 +55,7 @@ jobs:
- name: Post permission denied comment
if: steps.check_status.outputs.permission_denied == 'true'
uses: actions/github-script@v8
uses: actions/github-script@v7
with:
script: |
await github.rest.issues.createComment({
@@ -68,7 +68,7 @@ jobs:
- name: Get PR details for deployment
id: pr_details
if: steps.check_status.outputs.should_deploy == 'true' || steps.check_status.outputs.should_undeploy == 'true'
uses: actions/github-script@v8
uses: actions/github-script@v7
with:
script: |
const pr = await github.rest.pulls.get({
@@ -98,7 +98,7 @@ jobs:
- name: Post deploy success comment
if: steps.check_status.outputs.should_deploy == 'true'
uses: actions/github-script@v8
uses: actions/github-script@v7
with:
script: |
await github.rest.issues.createComment({
@@ -126,7 +126,7 @@ jobs:
- name: Post undeploy success comment
if: steps.check_status.outputs.should_undeploy == 'true'
uses: actions/github-script@v8
uses: actions/github-script@v7
with:
script: |
await github.rest.issues.createComment({
@@ -139,7 +139,7 @@ jobs:
- name: Check deployment status on PR close
id: check_pr_close
if: github.event_name == 'pull_request' && github.event.action == 'closed'
uses: actions/github-script@v8
uses: actions/github-script@v7
with:
script: |
const comments = await github.rest.issues.listComments({
@@ -187,7 +187,7 @@ jobs:
github.event_name == 'pull_request' &&
github.event.action == 'closed' &&
steps.check_pr_close.outputs.should_undeploy == 'true'
uses: actions/github-script@v8
uses: actions/github-script@v7
with:
script: |
await github.rest.issues.createComment({

View File

@@ -42,7 +42,7 @@ jobs:
- 'autogpt_platform/frontend/src/components/**'
- name: Set up Node.js
uses: actions/setup-node@v6
uses: actions/setup-node@v4
with:
node-version: "22.18.0"
@@ -54,7 +54,7 @@ jobs:
run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT
- name: Cache dependencies
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.pnpm-store
key: ${{ steps.cache-key.outputs.key }}
@@ -74,7 +74,7 @@ jobs:
uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v6
uses: actions/setup-node@v4
with:
node-version: "22.18.0"
@@ -82,7 +82,7 @@ jobs:
run: corepack enable
- name: Restore dependencies cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.pnpm-store
key: ${{ needs.setup.outputs.cache-key }}
@@ -112,7 +112,7 @@ jobs:
fetch-depth: 0
- name: Set up Node.js
uses: actions/setup-node@v6
uses: actions/setup-node@v4
with:
node-version: "22.18.0"
@@ -120,7 +120,7 @@ jobs:
run: corepack enable
- name: Restore dependencies cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.pnpm-store
key: ${{ needs.setup.outputs.cache-key }}
@@ -153,7 +153,7 @@ jobs:
submodules: recursive
- name: Set up Node.js
uses: actions/setup-node@v6
uses: actions/setup-node@v4
with:
node-version: "22.18.0"
@@ -176,7 +176,7 @@ jobs:
uses: docker/setup-buildx-action@v3
- name: Cache Docker layers
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-frontend-test-${{ hashFiles('autogpt_platform/docker-compose.yml', 'autogpt_platform/backend/Dockerfile', 'autogpt_platform/backend/pyproject.toml', 'autogpt_platform/backend/poetry.lock') }}
@@ -231,7 +231,7 @@ jobs:
fi
- name: Restore dependencies cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.pnpm-store
key: ${{ needs.setup.outputs.cache-key }}
@@ -282,7 +282,7 @@ jobs:
submodules: recursive
- name: Set up Node.js
uses: actions/setup-node@v6
uses: actions/setup-node@v4
with:
node-version: "22.18.0"
@@ -290,7 +290,7 @@ jobs:
run: corepack enable
- name: Restore dependencies cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.pnpm-store
key: ${{ needs.setup.outputs.cache-key }}

View File

@@ -32,7 +32,7 @@ jobs:
uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v6
uses: actions/setup-node@v4
with:
node-version: "22.18.0"
@@ -44,7 +44,7 @@ jobs:
run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT
- name: Cache dependencies
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.pnpm-store
key: ${{ steps.cache-key.outputs.key }}
@@ -68,7 +68,7 @@ jobs:
submodules: recursive
- name: Set up Node.js
uses: actions/setup-node@v6
uses: actions/setup-node@v4
with:
node-version: "22.18.0"
@@ -88,7 +88,7 @@ jobs:
docker compose -f ../docker-compose.yml --profile local --profile deps_backend up -d
- name: Restore dependencies cache
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.pnpm-store
key: ${{ needs.setup.outputs.cache-key }}

File diff suppressed because it is too large Load Diff

View File

@@ -9,25 +9,25 @@ packages = [{ include = "autogpt_libs" }]
[tool.poetry.dependencies]
python = ">=3.10,<4.0"
colorama = "^0.4.6"
cryptography = "^46.0"
cryptography = "^45.0"
expiringdict = "^1.2.2"
fastapi = "^0.128.0"
google-cloud-logging = "^3.13.0"
launchdarkly-server-sdk = "^9.14.1"
pydantic = "^2.12.5"
pydantic-settings = "^2.12.0"
pyjwt = { version = "^2.11.0", extras = ["crypto"] }
fastapi = "^0.116.1"
google-cloud-logging = "^3.12.1"
launchdarkly-server-sdk = "^9.12.0"
pydantic = "^2.11.7"
pydantic-settings = "^2.10.1"
pyjwt = { version = "^2.10.1", extras = ["crypto"] }
redis = "^6.2.0"
supabase = "^2.27.2"
uvicorn = "^0.40.0"
supabase = "^2.16.0"
uvicorn = "^0.35.0"
[tool.poetry.group.dev.dependencies]
pyright = "^1.1.408"
pyright = "^1.1.404"
pytest = "^8.4.1"
pytest-asyncio = "^1.3.0"
pytest-mock = "^3.15.1"
pytest-asyncio = "^1.1.0"
pytest-mock = "^3.14.1"
pytest-cov = "^6.2.1"
ruff = "^0.15.0"
ruff = "^0.12.11"
[build-system]
requires = ["poetry-core"]

View File

@@ -45,7 +45,10 @@ async def create_chat_session(
successfulAgentRuns=SafeJson({}),
successfulAgentSchedules=SafeJson({}),
)
return await PrismaChatSession.prisma().create(data=data)
return await PrismaChatSession.prisma().create(
data=data,
include={"Messages": True},
)
async def update_chat_session(

View File

@@ -266,38 +266,12 @@ async def stream_chat_post(
"""
import asyncio
import time
stream_start_time = time.perf_counter()
# Base log metadata (task_id added after creation)
log_meta = {"component": "ChatStream", "session_id": session_id}
if user_id:
log_meta["user_id"] = user_id
logger.info(
f"[TIMING] stream_chat_post STARTED, session={session_id}, "
f"user={user_id}, message_len={len(request.message)}",
extra={"json_fields": log_meta},
)
session = await _validate_and_get_session(session_id, user_id)
logger.info(
f"[TIMING] session validated in {(time.perf_counter() - stream_start_time)*1000:.1f}ms",
extra={
"json_fields": {
**log_meta,
"duration_ms": (time.perf_counter() - stream_start_time) * 1000,
}
},
)
# Create a task in the stream registry for reconnection support
task_id = str(uuid_module.uuid4())
operation_id = str(uuid_module.uuid4())
log_meta["task_id"] = task_id
task_create_start = time.perf_counter()
await stream_registry.create_task(
task_id=task_id,
session_id=session_id,
@@ -306,46 +280,14 @@ async def stream_chat_post(
tool_name="chat",
operation_id=operation_id,
)
logger.info(
f"[TIMING] create_task completed in {(time.perf_counter() - task_create_start)*1000:.1f}ms",
extra={
"json_fields": {
**log_meta,
"duration_ms": (time.perf_counter() - task_create_start) * 1000,
}
},
)
# Background task that runs the AI generation independently of SSE connection
async def run_ai_generation():
import time as time_module
gen_start_time = time_module.perf_counter()
logger.info(
f"[TIMING] run_ai_generation STARTED, task={task_id}, session={session_id}, user={user_id}",
extra={"json_fields": log_meta},
)
first_chunk_time, ttfc = None, None
chunk_count = 0
try:
# Emit a start event with task_id for reconnection
start_chunk = StreamStart(messageId=task_id, taskId=task_id)
await stream_registry.publish_chunk(task_id, start_chunk)
logger.info(
f"[TIMING] StreamStart published at {(time_module.perf_counter() - gen_start_time)*1000:.1f}ms",
extra={
"json_fields": {
**log_meta,
"elapsed_ms": (time_module.perf_counter() - gen_start_time)
* 1000,
}
},
)
logger.info(
"[TIMING] Calling stream_chat_completion",
extra={"json_fields": log_meta},
)
async for chunk in chat_service.stream_chat_completion(
session_id,
request.message,
@@ -354,202 +296,54 @@ async def stream_chat_post(
session=session, # Pass pre-fetched session to avoid double-fetch
context=request.context,
):
chunk_count += 1
if first_chunk_time is None:
first_chunk_time = time_module.perf_counter()
ttfc = first_chunk_time - gen_start_time
logger.info(
f"[TIMING] FIRST AI CHUNK at {ttfc:.2f}s, type={type(chunk).__name__}",
extra={
"json_fields": {
**log_meta,
"chunk_type": type(chunk).__name__,
"time_to_first_chunk_ms": ttfc * 1000,
}
},
)
# Write to Redis (subscribers will receive via XREAD)
await stream_registry.publish_chunk(task_id, chunk)
gen_end_time = time_module.perf_counter()
total_time = (gen_end_time - gen_start_time) * 1000
logger.info(
f"[TIMING] run_ai_generation FINISHED in {total_time/1000:.1f}s; "
f"task={task_id}, session={session_id}, "
f"ttfc={ttfc or -1:.2f}s, n_chunks={chunk_count}",
extra={
"json_fields": {
**log_meta,
"total_time_ms": total_time,
"time_to_first_chunk_ms": (
ttfc * 1000 if ttfc is not None else None
),
"n_chunks": chunk_count,
}
},
)
# Mark task as completed
await stream_registry.mark_task_completed(task_id, "completed")
except Exception as e:
elapsed = time_module.perf_counter() - gen_start_time
logger.error(
f"[TIMING] run_ai_generation ERROR after {elapsed:.2f}s: {e}",
extra={
"json_fields": {
**log_meta,
"elapsed_ms": elapsed * 1000,
"error": str(e),
}
},
f"Error in background AI generation for session {session_id}: {e}"
)
await stream_registry.mark_task_completed(task_id, "failed")
# Start the AI generation in a background task
bg_task = asyncio.create_task(run_ai_generation())
await stream_registry.set_task_asyncio_task(task_id, bg_task)
setup_time = (time.perf_counter() - stream_start_time) * 1000
logger.info(
f"[TIMING] Background task started, setup={setup_time:.1f}ms",
extra={"json_fields": {**log_meta, "setup_time_ms": setup_time}},
)
# SSE endpoint that subscribes to the task's stream
async def event_generator() -> AsyncGenerator[str, None]:
import time as time_module
event_gen_start = time_module.perf_counter()
logger.info(
f"[TIMING] event_generator STARTED, task={task_id}, session={session_id}, "
f"user={user_id}",
extra={"json_fields": log_meta},
)
subscriber_queue = None
first_chunk_yielded = False
chunks_yielded = 0
try:
# Subscribe to the task stream (this replays existing messages + live updates)
subscribe_start = time_module.perf_counter()
logger.info(
"[TIMING] Calling subscribe_to_task",
extra={"json_fields": log_meta},
)
subscriber_queue = await stream_registry.subscribe_to_task(
task_id=task_id,
user_id=user_id,
last_message_id="0-0", # Get all messages from the beginning
)
subscribe_time = (time_module.perf_counter() - subscribe_start) * 1000
logger.info(
f"[TIMING] subscribe_to_task completed in {subscribe_time:.1f}ms, "
f"queue_ok={subscriber_queue is not None}",
extra={
"json_fields": {
**log_meta,
"duration_ms": subscribe_time,
"queue_obtained": subscriber_queue is not None,
}
},
)
if subscriber_queue is None:
logger.info(
"[TIMING] subscriber_queue is None, yielding finish",
extra={"json_fields": log_meta},
)
yield StreamFinish().to_sse()
yield "data: [DONE]\n\n"
return
# Read from the subscriber queue and yield to SSE
logger.info(
"[TIMING] Starting to read from subscriber_queue",
extra={"json_fields": log_meta},
)
while True:
try:
queue_wait_start = time_module.perf_counter()
chunk = await asyncio.wait_for(subscriber_queue.get(), timeout=30.0)
queue_wait_time = (
time_module.perf_counter() - queue_wait_start
) * 1000
chunks_yielded += 1
if not first_chunk_yielded:
first_chunk_yielded = True
elapsed = time_module.perf_counter() - event_gen_start
logger.info(
f"[TIMING] FIRST CHUNK from queue at {elapsed:.2f}s, "
f"type={type(chunk).__name__}, "
f"wait={queue_wait_time:.1f}ms",
extra={
"json_fields": {
**log_meta,
"chunk_type": type(chunk).__name__,
"elapsed_ms": elapsed * 1000,
"queue_wait_ms": queue_wait_time,
}
},
)
elif chunks_yielded % 50 == 0:
logger.info(
f"[TIMING] Chunk #{chunks_yielded}, "
f"type={type(chunk).__name__}",
extra={
"json_fields": {
**log_meta,
"chunk_number": chunks_yielded,
"chunk_type": type(chunk).__name__,
}
},
)
yield chunk.to_sse()
# Check for finish signal
if isinstance(chunk, StreamFinish):
total_time = time_module.perf_counter() - event_gen_start
logger.info(
f"[TIMING] StreamFinish received in {total_time:.2f}s; "
f"n_chunks={chunks_yielded}",
extra={
"json_fields": {
**log_meta,
"chunks_yielded": chunks_yielded,
"total_time_ms": total_time * 1000,
}
},
)
break
except asyncio.TimeoutError:
# Send heartbeat to keep connection alive
logger.info(
f"[TIMING] Heartbeat timeout, chunks_so_far={chunks_yielded}",
extra={
"json_fields": {**log_meta, "chunks_so_far": chunks_yielded}
},
)
yield StreamHeartbeat().to_sse()
except GeneratorExit:
logger.info(
f"[TIMING] GeneratorExit (client disconnected), chunks={chunks_yielded}",
extra={
"json_fields": {
**log_meta,
"chunks_yielded": chunks_yielded,
"reason": "client_disconnect",
}
},
)
pass # Client disconnected - background task continues
except Exception as e:
elapsed = (time_module.perf_counter() - event_gen_start) * 1000
logger.error(
f"[TIMING] event_generator ERROR after {elapsed:.1f}ms: {e}",
extra={
"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}
},
)
logger.error(f"Error in SSE stream for task {task_id}: {e}")
finally:
# Unsubscribe when client disconnects or stream ends to prevent resource leak
if subscriber_queue is not None:
@@ -563,18 +357,6 @@ async def stream_chat_post(
exc_info=True,
)
# AI SDK protocol termination - always yield even if unsubscribe fails
total_time = time_module.perf_counter() - event_gen_start
logger.info(
f"[TIMING] event_generator FINISHED in {total_time:.2f}s; "
f"task={task_id}, session={session_id}, n_chunks={chunks_yielded}",
extra={
"json_fields": {
**log_meta,
"total_time_ms": total_time * 1000,
"chunks_yielded": chunks_yielded,
}
},
)
yield "data: [DONE]\n\n"
return StreamingResponse(
@@ -643,7 +425,7 @@ async def stream_chat_get(
"Chat stream completed",
extra={
"session_id": session_id,
"n_chunks": chunk_count,
"chunk_count": chunk_count,
"first_chunk_type": first_chunk_type,
},
)

View File

@@ -371,45 +371,21 @@ async def stream_chat_completion(
ValueError: If max_context_messages is exceeded
"""
completion_start = time.monotonic()
# Build log metadata for structured logging
log_meta = {"component": "ChatService", "session_id": session_id}
if user_id:
log_meta["user_id"] = user_id
logger.info(
f"[TIMING] stream_chat_completion STARTED, session={session_id}, user={user_id}, "
f"message_len={len(message) if message else 0}, is_user={is_user_message}",
extra={
"json_fields": {
**log_meta,
"message_len": len(message) if message else 0,
"is_user_message": is_user_message,
}
},
f"Streaming chat completion for session {session_id} for message {message} and user id {user_id}. Message is user message: {is_user_message}"
)
# Only fetch from Redis if session not provided (initial call)
if session is None:
fetch_start = time.monotonic()
session = await get_chat_session(session_id, user_id)
fetch_time = (time.monotonic() - fetch_start) * 1000
logger.info(
f"[TIMING] get_chat_session took {fetch_time:.1f}ms, "
f"n_messages={len(session.messages) if session else 0}",
extra={
"json_fields": {
**log_meta,
"duration_ms": fetch_time,
"n_messages": len(session.messages) if session else 0,
}
},
f"Fetched session from Redis: {session.session_id if session else 'None'}, "
f"message_count={len(session.messages) if session else 0}"
)
else:
logger.info(
f"[TIMING] Using provided session, messages={len(session.messages)}",
extra={"json_fields": {**log_meta, "n_messages": len(session.messages)}},
f"Using provided session object: {session.session_id}, "
f"message_count={len(session.messages)}"
)
if not session:
@@ -430,25 +406,17 @@ async def stream_chat_completion(
# Track user message in PostHog
if is_user_message:
posthog_start = time.monotonic()
track_user_message(
user_id=user_id,
session_id=session_id,
message_length=len(message),
)
posthog_time = (time.monotonic() - posthog_start) * 1000
logger.info(
f"[TIMING] track_user_message took {posthog_time:.1f}ms",
extra={"json_fields": {**log_meta, "duration_ms": posthog_time}},
)
upsert_start = time.monotonic()
session = await upsert_chat_session(session)
upsert_time = (time.monotonic() - upsert_start) * 1000
logger.info(
f"[TIMING] upsert_chat_session took {upsert_time:.1f}ms",
extra={"json_fields": {**log_meta, "duration_ms": upsert_time}},
f"Upserting session: {session.session_id} with user id {session.user_id}, "
f"message_count={len(session.messages)}"
)
session = await upsert_chat_session(session)
assert session, "Session not found"
# Generate title for new sessions on first user message (non-blocking)
@@ -486,13 +454,7 @@ async def stream_chat_completion(
asyncio.create_task(_update_title())
# Build system prompt with business understanding
prompt_start = time.monotonic()
system_prompt, understanding = await _build_system_prompt(user_id)
prompt_time = (time.monotonic() - prompt_start) * 1000
logger.info(
f"[TIMING] _build_system_prompt took {prompt_time:.1f}ms",
extra={"json_fields": {**log_meta, "duration_ms": prompt_time}},
)
# Initialize variables for streaming
assistant_response = ChatMessage(
@@ -521,18 +483,9 @@ async def stream_chat_completion(
text_block_id = str(uuid_module.uuid4())
# Yield message start
setup_time = (time.monotonic() - completion_start) * 1000
logger.info(
f"[TIMING] Setup complete, yielding StreamStart at {setup_time:.1f}ms",
extra={"json_fields": {**log_meta, "setup_time_ms": setup_time}},
)
yield StreamStart(messageId=message_id)
try:
logger.info(
"[TIMING] Calling _stream_chat_chunks",
extra={"json_fields": log_meta},
)
async for chunk in _stream_chat_chunks(
session=session,
tools=tools,
@@ -940,21 +893,9 @@ async def _stream_chat_chunks(
SSE formatted JSON response objects
"""
import time as time_module
stream_chunks_start = time_module.perf_counter()
model = config.model
# Build log metadata for structured logging
log_meta = {"component": "ChatService", "session_id": session.session_id}
if session.user_id:
log_meta["user_id"] = session.user_id
logger.info(
f"[TIMING] _stream_chat_chunks STARTED, session={session.session_id}, "
f"user={session.user_id}, n_messages={len(session.messages)}",
extra={"json_fields": {**log_meta, "n_messages": len(session.messages)}},
)
logger.info("Starting pure chat stream")
messages = session.to_openai_messages()
if system_prompt:
@@ -965,18 +906,12 @@ async def _stream_chat_chunks(
messages = [system_message] + messages
# Apply context window management
context_start = time_module.perf_counter()
context_result = await _manage_context_window(
messages=messages,
model=model,
api_key=config.api_key,
base_url=config.base_url,
)
context_time = (time_module.perf_counter() - context_start) * 1000
logger.info(
f"[TIMING] _manage_context_window took {context_time:.1f}ms",
extra={"json_fields": {**log_meta, "duration_ms": context_time}},
)
if context_result.error:
if "System prompt dropped" in context_result.error:
@@ -1011,19 +946,9 @@ async def _stream_chat_chunks(
while retry_count <= MAX_RETRIES:
try:
elapsed = (time_module.perf_counter() - stream_chunks_start) * 1000
retry_info = (
f" (retry {retry_count}/{MAX_RETRIES})" if retry_count > 0 else ""
)
logger.info(
f"[TIMING] Creating OpenAI stream at {elapsed:.1f}ms{retry_info}",
extra={
"json_fields": {
**log_meta,
"elapsed_ms": elapsed,
"retry_count": retry_count,
}
},
f"Creating OpenAI chat completion stream..."
f"{f' (retry {retry_count}/{MAX_RETRIES})' if retry_count > 0 else ''}"
)
# Build extra_body for OpenRouter tracing and PostHog analytics
@@ -1040,7 +965,6 @@ async def _stream_chat_chunks(
:128
] # OpenRouter limit
api_call_start = time_module.perf_counter()
stream = await client.chat.completions.create(
model=model,
messages=cast(list[ChatCompletionMessageParam], messages),
@@ -1050,11 +974,6 @@ async def _stream_chat_chunks(
stream_options=ChatCompletionStreamOptionsParam(include_usage=True),
extra_body=extra_body,
)
api_init_time = (time_module.perf_counter() - api_call_start) * 1000
logger.info(
f"[TIMING] OpenAI stream object returned in {api_init_time:.1f}ms",
extra={"json_fields": {**log_meta, "duration_ms": api_init_time}},
)
# Variables to accumulate tool calls
tool_calls: list[dict[str, Any]] = []
@@ -1065,13 +984,10 @@ async def _stream_chat_chunks(
# Track if we've started the text block
text_started = False
first_content_chunk = True
chunk_count = 0
# Process the stream
chunk: ChatCompletionChunk
async for chunk in stream:
chunk_count += 1
if chunk.usage:
yield StreamUsage(
promptTokens=chunk.usage.prompt_tokens,
@@ -1094,23 +1010,6 @@ async def _stream_chat_chunks(
if not text_started and text_block_id:
yield StreamTextStart(id=text_block_id)
text_started = True
# Log timing for first content chunk
if first_content_chunk:
first_content_chunk = False
ttfc = (
time_module.perf_counter() - api_call_start
) * 1000
logger.info(
f"[TIMING] FIRST CONTENT CHUNK at {ttfc:.1f}ms "
f"(since API call), n_chunks={chunk_count}",
extra={
"json_fields": {
**log_meta,
"time_to_first_chunk_ms": ttfc,
"n_chunks": chunk_count,
}
},
)
# Stream the text delta
text_response = StreamTextDelta(
id=text_block_id or "",
@@ -1167,21 +1066,7 @@ async def _stream_chat_chunks(
toolName=tool_calls[idx]["function"]["name"],
)
emitted_start_for_idx.add(idx)
stream_duration = time_module.perf_counter() - api_call_start
logger.info(
f"[TIMING] OpenAI stream COMPLETE, finish_reason={finish_reason}, "
f"duration={stream_duration:.2f}s, "
f"n_chunks={chunk_count}, n_tool_calls={len(tool_calls)}",
extra={
"json_fields": {
**log_meta,
"stream_duration_ms": stream_duration * 1000,
"finish_reason": finish_reason,
"n_chunks": chunk_count,
"n_tool_calls": len(tool_calls),
}
},
)
logger.info(f"Stream complete. Finish reason: {finish_reason}")
# Yield all accumulated tool calls after the stream is complete
# This ensures all tool call arguments have been fully received
@@ -1201,12 +1086,6 @@ async def _stream_chat_chunks(
# Re-raise to trigger retry logic in the parent function
raise
total_time = (time_module.perf_counter() - stream_chunks_start) * 1000
logger.info(
f"[TIMING] _stream_chat_chunks COMPLETED in {total_time/1000:.1f}s; "
f"session={session.session_id}, user={session.user_id}",
extra={"json_fields": {**log_meta, "total_time_ms": total_time}},
)
yield StreamFinish()
return
except Exception as e:

View File

@@ -104,24 +104,6 @@ async def create_task(
Returns:
The created ActiveTask instance (metadata only)
"""
import time
start_time = time.perf_counter()
# Build log metadata for structured logging
log_meta = {
"component": "StreamRegistry",
"task_id": task_id,
"session_id": session_id,
}
if user_id:
log_meta["user_id"] = user_id
logger.info(
f"[TIMING] create_task STARTED, task={task_id}, session={session_id}, user={user_id}",
extra={"json_fields": log_meta},
)
task = ActiveTask(
task_id=task_id,
session_id=session_id,
@@ -132,18 +114,10 @@ async def create_task(
)
# Store metadata in Redis
redis_start = time.perf_counter()
redis = await get_redis_async()
redis_time = (time.perf_counter() - redis_start) * 1000
logger.info(
f"[TIMING] get_redis_async took {redis_time:.1f}ms",
extra={"json_fields": {**log_meta, "duration_ms": redis_time}},
)
meta_key = _get_task_meta_key(task_id)
op_key = _get_operation_mapping_key(operation_id)
hset_start = time.perf_counter()
await redis.hset( # type: ignore[misc]
meta_key,
mapping={
@@ -157,22 +131,12 @@ async def create_task(
"created_at": task.created_at.isoformat(),
},
)
hset_time = (time.perf_counter() - hset_start) * 1000
logger.info(
f"[TIMING] redis.hset took {hset_time:.1f}ms",
extra={"json_fields": {**log_meta, "duration_ms": hset_time}},
)
await redis.expire(meta_key, config.stream_ttl)
# Create operation_id -> task_id mapping for webhook lookups
await redis.set(op_key, task_id, ex=config.stream_ttl)
total_time = (time.perf_counter() - start_time) * 1000
logger.info(
f"[TIMING] create_task COMPLETED in {total_time:.1f}ms; task={task_id}, session={session_id}",
extra={"json_fields": {**log_meta, "total_time_ms": total_time}},
)
logger.debug(f"Created task {task_id} for session {session_id}")
return task
@@ -192,60 +156,26 @@ async def publish_chunk(
Returns:
The Redis Stream message ID
"""
import time
start_time = time.perf_counter()
chunk_type = type(chunk).__name__
chunk_json = chunk.model_dump_json()
message_id = "0-0"
# Build log metadata
log_meta = {
"component": "StreamRegistry",
"task_id": task_id,
"chunk_type": chunk_type,
}
try:
redis = await get_redis_async()
stream_key = _get_task_stream_key(task_id)
# Write to Redis Stream for persistence and real-time delivery
xadd_start = time.perf_counter()
raw_id = await redis.xadd(
stream_key,
{"data": chunk_json},
maxlen=config.stream_max_length,
)
xadd_time = (time.perf_counter() - xadd_start) * 1000
message_id = raw_id if isinstance(raw_id, str) else raw_id.decode()
# Set TTL on stream to match task metadata TTL
await redis.expire(stream_key, config.stream_ttl)
total_time = (time.perf_counter() - start_time) * 1000
# Only log timing for significant chunks or slow operations
if (
chunk_type
in ("StreamStart", "StreamFinish", "StreamTextStart", "StreamTextEnd")
or total_time > 50
):
logger.info(
f"[TIMING] publish_chunk {chunk_type} in {total_time:.1f}ms (xadd={xadd_time:.1f}ms)",
extra={
"json_fields": {
**log_meta,
"total_time_ms": total_time,
"xadd_time_ms": xadd_time,
"message_id": message_id,
}
},
)
except Exception as e:
elapsed = (time.perf_counter() - start_time) * 1000
logger.error(
f"[TIMING] Failed to publish chunk {chunk_type} after {elapsed:.1f}ms: {e}",
extra={"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}},
f"Failed to publish chunk for task {task_id}: {e}",
exc_info=True,
)
@@ -270,61 +200,24 @@ async def subscribe_to_task(
An asyncio Queue that will receive stream chunks, or None if task not found
or user doesn't have access
"""
import time
start_time = time.perf_counter()
# Build log metadata
log_meta = {"component": "StreamRegistry", "task_id": task_id}
if user_id:
log_meta["user_id"] = user_id
logger.info(
f"[TIMING] subscribe_to_task STARTED, task={task_id}, user={user_id}, last_msg={last_message_id}",
extra={"json_fields": {**log_meta, "last_message_id": last_message_id}},
)
redis_start = time.perf_counter()
redis = await get_redis_async()
meta_key = _get_task_meta_key(task_id)
meta: dict[Any, Any] = await redis.hgetall(meta_key) # type: ignore[misc]
hgetall_time = (time.perf_counter() - redis_start) * 1000
logger.info(
f"[TIMING] Redis hgetall took {hgetall_time:.1f}ms",
extra={"json_fields": {**log_meta, "duration_ms": hgetall_time}},
)
if not meta:
elapsed = (time.perf_counter() - start_time) * 1000
logger.info(
f"[TIMING] Task not found in Redis after {elapsed:.1f}ms",
extra={
"json_fields": {
**log_meta,
"elapsed_ms": elapsed,
"reason": "task_not_found",
}
},
)
logger.debug(f"Task {task_id} not found in Redis")
return None
# Note: Redis client uses decode_responses=True, so keys are strings
task_status = meta.get("status", "")
task_user_id = meta.get("user_id", "") or None
log_meta["session_id"] = meta.get("session_id", "")
# Validate ownership - if task has an owner, requester must match
if task_user_id:
if user_id != task_user_id:
logger.warning(
f"[TIMING] Access denied: user {user_id} tried to access task owned by {task_user_id}",
extra={
"json_fields": {
**log_meta,
"task_owner": task_user_id,
"reason": "access_denied",
}
},
f"User {user_id} denied access to task {task_id} "
f"owned by {task_user_id}"
)
return None
@@ -332,19 +225,7 @@ async def subscribe_to_task(
stream_key = _get_task_stream_key(task_id)
# Step 1: Replay messages from Redis Stream
xread_start = time.perf_counter()
messages = await redis.xread({stream_key: last_message_id}, block=0, count=1000)
xread_time = (time.perf_counter() - xread_start) * 1000
logger.info(
f"[TIMING] Redis xread (replay) took {xread_time:.1f}ms, status={task_status}",
extra={
"json_fields": {
**log_meta,
"duration_ms": xread_time,
"task_status": task_status,
}
},
)
replayed_count = 0
replay_last_id = last_message_id
@@ -363,48 +244,19 @@ async def subscribe_to_task(
except Exception as e:
logger.warning(f"Failed to replay message: {e}")
logger.info(
f"[TIMING] Replayed {replayed_count} messages, last_id={replay_last_id}",
extra={
"json_fields": {
**log_meta,
"n_messages_replayed": replayed_count,
"replay_last_id": replay_last_id,
}
},
)
logger.debug(f"Task {task_id}: replayed {replayed_count} messages")
# Step 2: If task is still running, start stream listener for live updates
if task_status == "running":
logger.info(
"[TIMING] Task still running, starting _stream_listener",
extra={"json_fields": {**log_meta, "task_status": task_status}},
)
listener_task = asyncio.create_task(
_stream_listener(task_id, subscriber_queue, replay_last_id, log_meta)
_stream_listener(task_id, subscriber_queue, replay_last_id)
)
# Track listener task for cleanup on unsubscribe
_listener_tasks[id(subscriber_queue)] = (task_id, listener_task)
else:
# Task is completed/failed - add finish marker
logger.info(
f"[TIMING] Task already {task_status}, adding StreamFinish",
extra={"json_fields": {**log_meta, "task_status": task_status}},
)
await subscriber_queue.put(StreamFinish())
total_time = (time.perf_counter() - start_time) * 1000
logger.info(
f"[TIMING] subscribe_to_task COMPLETED in {total_time:.1f}ms; task={task_id}, "
f"n_messages_replayed={replayed_count}",
extra={
"json_fields": {
**log_meta,
"total_time_ms": total_time,
"n_messages_replayed": replayed_count,
}
},
)
return subscriber_queue
@@ -412,7 +264,6 @@ async def _stream_listener(
task_id: str,
subscriber_queue: asyncio.Queue[StreamBaseResponse],
last_replayed_id: str,
log_meta: dict | None = None,
) -> None:
"""Listen to Redis Stream for new messages using blocking XREAD.
@@ -423,27 +274,10 @@ async def _stream_listener(
task_id: Task ID to listen for
subscriber_queue: Queue to deliver messages to
last_replayed_id: Last message ID from replay (continue from here)
log_meta: Structured logging metadata
"""
import time
start_time = time.perf_counter()
# Use provided log_meta or build minimal one
if log_meta is None:
log_meta = {"component": "StreamRegistry", "task_id": task_id}
logger.info(
f"[TIMING] _stream_listener STARTED, task={task_id}, last_id={last_replayed_id}",
extra={"json_fields": {**log_meta, "last_replayed_id": last_replayed_id}},
)
queue_id = id(subscriber_queue)
# Track the last successfully delivered message ID for recovery hints
last_delivered_id = last_replayed_id
messages_delivered = 0
first_message_time = None
xread_count = 0
try:
redis = await get_redis_async()
@@ -453,39 +287,9 @@ async def _stream_listener(
while True:
# Block for up to 30 seconds waiting for new messages
# This allows periodic checking if task is still running
xread_start = time.perf_counter()
xread_count += 1
messages = await redis.xread(
{stream_key: current_id}, block=30000, count=100
)
xread_time = (time.perf_counter() - xread_start) * 1000
if messages:
msg_count = sum(len(msgs) for _, msgs in messages)
logger.info(
f"[TIMING] xread #{xread_count} returned {msg_count} messages in {xread_time:.1f}ms",
extra={
"json_fields": {
**log_meta,
"xread_count": xread_count,
"n_messages": msg_count,
"duration_ms": xread_time,
}
},
)
elif xread_time > 1000:
# Only log timeouts (30s blocking)
logger.info(
f"[TIMING] xread #{xread_count} timeout after {xread_time:.1f}ms",
extra={
"json_fields": {
**log_meta,
"xread_count": xread_count,
"duration_ms": xread_time,
"reason": "timeout",
}
},
)
if not messages:
# Timeout - check if task is still running
@@ -522,30 +326,10 @@ async def _stream_listener(
)
# Update last delivered ID on successful delivery
last_delivered_id = current_id
messages_delivered += 1
if first_message_time is None:
first_message_time = time.perf_counter()
elapsed = (first_message_time - start_time) * 1000
logger.info(
f"[TIMING] FIRST live message at {elapsed:.1f}ms, type={type(chunk).__name__}",
extra={
"json_fields": {
**log_meta,
"elapsed_ms": elapsed,
"chunk_type": type(chunk).__name__,
}
},
)
except asyncio.TimeoutError:
logger.warning(
f"[TIMING] Subscriber queue full, delivery timed out after {QUEUE_PUT_TIMEOUT}s",
extra={
"json_fields": {
**log_meta,
"timeout_s": QUEUE_PUT_TIMEOUT,
"reason": "queue_full",
}
},
f"Subscriber queue full for task {task_id}, "
f"message delivery timed out after {QUEUE_PUT_TIMEOUT}s"
)
# Send overflow error with recovery info
try:
@@ -567,44 +351,15 @@ async def _stream_listener(
# Stop listening on finish
if isinstance(chunk, StreamFinish):
total_time = (time.perf_counter() - start_time) * 1000
logger.info(
f"[TIMING] StreamFinish received in {total_time/1000:.1f}s; delivered={messages_delivered}",
extra={
"json_fields": {
**log_meta,
"total_time_ms": total_time,
"messages_delivered": messages_delivered,
}
},
)
return
except Exception as e:
logger.warning(
f"Error processing stream message: {e}",
extra={"json_fields": {**log_meta, "error": str(e)}},
)
logger.warning(f"Error processing stream message: {e}")
except asyncio.CancelledError:
elapsed = (time.perf_counter() - start_time) * 1000
logger.info(
f"[TIMING] _stream_listener CANCELLED after {elapsed:.1f}ms, delivered={messages_delivered}",
extra={
"json_fields": {
**log_meta,
"elapsed_ms": elapsed,
"messages_delivered": messages_delivered,
"reason": "cancelled",
}
},
)
logger.debug(f"Stream listener cancelled for task {task_id}")
raise # Re-raise to propagate cancellation
except Exception as e:
elapsed = (time.perf_counter() - start_time) * 1000
logger.error(
f"[TIMING] _stream_listener ERROR after {elapsed:.1f}ms: {e}",
extra={"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}},
)
logger.error(f"Stream listener error for task {task_id}: {e}")
# On error, send finish to unblock subscriber
try:
await asyncio.wait_for(
@@ -613,24 +368,10 @@ async def _stream_listener(
)
except (asyncio.TimeoutError, asyncio.QueueFull):
logger.warning(
"Could not deliver finish event after error",
extra={"json_fields": log_meta},
f"Could not deliver finish event for task {task_id} after error"
)
finally:
# Clean up listener task mapping on exit
total_time = (time.perf_counter() - start_time) * 1000
logger.info(
f"[TIMING] _stream_listener FINISHED in {total_time/1000:.1f}s; task={task_id}, "
f"delivered={messages_delivered}, xread_count={xread_count}",
extra={
"json_fields": {
**log_meta,
"total_time_ms": total_time,
"messages_delivered": messages_delivered,
"xread_count": xread_count,
}
},
)
_listener_tasks.pop(queue_id, None)

View File

@@ -13,32 +13,10 @@ from backend.api.features.chat.tools.models import (
NoResultsResponse,
)
from backend.api.features.store.hybrid_search import unified_hybrid_search
from backend.data.block import BlockType, get_block
from backend.data.block import get_block
logger = logging.getLogger(__name__)
_TARGET_RESULTS = 10
# Over-fetch to compensate for post-hoc filtering of graph-only blocks.
# 40 is 2x current removed; speed of query 10 vs 40 is minimial
_OVERFETCH_PAGE_SIZE = 40
# Block types that only work within graphs and cannot run standalone in CoPilot.
COPILOT_EXCLUDED_BLOCK_TYPES = {
BlockType.INPUT, # Graph interface definition - data enters via chat, not graph inputs
BlockType.OUTPUT, # Graph interface definition - data exits via chat, not graph outputs
BlockType.WEBHOOK, # Wait for external events - would hang forever in CoPilot
BlockType.WEBHOOK_MANUAL, # Same as WEBHOOK
BlockType.NOTE, # Visual annotation only - no runtime behavior
BlockType.HUMAN_IN_THE_LOOP, # Pauses for human approval - CoPilot IS human-in-the-loop
BlockType.AGENT, # AgentExecutorBlock requires execution_context - use run_agent tool
}
# Specific block IDs excluded from CoPilot (STANDARD type but still require graph context)
COPILOT_EXCLUDED_BLOCK_IDS = {
# SmartDecisionMakerBlock - dynamically discovers downstream blocks via graph topology
"3b191d9f-356f-482d-8238-ba04b6d18381",
}
class FindBlockTool(BaseTool):
"""Tool for searching available blocks."""
@@ -110,7 +88,7 @@ class FindBlockTool(BaseTool):
query=query,
content_types=[ContentType.BLOCK],
page=1,
page_size=_OVERFETCH_PAGE_SIZE,
page_size=10,
)
if not results:
@@ -130,90 +108,60 @@ class FindBlockTool(BaseTool):
block = get_block(block_id)
# Skip disabled blocks
if not block or block.disabled:
continue
if block and not block.disabled:
# Get input/output schemas
input_schema = {}
output_schema = {}
try:
input_schema = block.input_schema.jsonschema()
except Exception:
pass
try:
output_schema = block.output_schema.jsonschema()
except Exception:
pass
# Skip blocks excluded from CoPilot (graph-only blocks)
if (
block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
or block.id in COPILOT_EXCLUDED_BLOCK_IDS
):
continue
# Get categories from block instance
categories = []
if hasattr(block, "categories") and block.categories:
categories = [cat.value for cat in block.categories]
# Get input/output schemas
input_schema = {}
output_schema = {}
try:
input_schema = block.input_schema.jsonschema()
except Exception as e:
logger.debug(
"Failed to generate input schema for block %s: %s",
block_id,
e,
)
try:
output_schema = block.output_schema.jsonschema()
except Exception as e:
logger.debug(
"Failed to generate output schema for block %s: %s",
block_id,
e,
)
# Get categories from block instance
categories = []
if hasattr(block, "categories") and block.categories:
categories = [cat.value for cat in block.categories]
# Extract required inputs for easier use
required_inputs: list[BlockInputFieldInfo] = []
if input_schema:
properties = input_schema.get("properties", {})
required_fields = set(input_schema.get("required", []))
# Get credential field names to exclude from required inputs
credentials_fields = set(
block.input_schema.get_credentials_fields().keys()
)
for field_name, field_schema in properties.items():
# Skip credential fields - they're handled separately
if field_name in credentials_fields:
continue
required_inputs.append(
BlockInputFieldInfo(
name=field_name,
type=field_schema.get("type", "string"),
description=field_schema.get("description", ""),
required=field_name in required_fields,
default=field_schema.get("default"),
)
# Extract required inputs for easier use
required_inputs: list[BlockInputFieldInfo] = []
if input_schema:
properties = input_schema.get("properties", {})
required_fields = set(input_schema.get("required", []))
# Get credential field names to exclude from required inputs
credentials_fields = set(
block.input_schema.get_credentials_fields().keys()
)
blocks.append(
BlockInfoSummary(
id=block_id,
name=block.name,
description=block.description or "",
categories=categories,
input_schema=input_schema,
output_schema=output_schema,
required_inputs=required_inputs,
for field_name, field_schema in properties.items():
# Skip credential fields - they're handled separately
if field_name in credentials_fields:
continue
required_inputs.append(
BlockInputFieldInfo(
name=field_name,
type=field_schema.get("type", "string"),
description=field_schema.get("description", ""),
required=field_name in required_fields,
default=field_schema.get("default"),
)
)
blocks.append(
BlockInfoSummary(
id=block_id,
name=block.name,
description=block.description or "",
categories=categories,
input_schema=input_schema,
output_schema=output_schema,
required_inputs=required_inputs,
)
)
)
if len(blocks) >= _TARGET_RESULTS:
break
if blocks and len(blocks) < _TARGET_RESULTS:
logger.debug(
"find_block returned %d/%d results for query '%s' "
"(filtered %d excluded/disabled blocks)",
len(blocks),
_TARGET_RESULTS,
query,
len(results) - len(blocks),
)
if not blocks:
return NoResultsResponse(

View File

@@ -1,139 +0,0 @@
"""Tests for block filtering in FindBlockTool."""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from backend.api.features.chat.tools.find_block import (
COPILOT_EXCLUDED_BLOCK_IDS,
COPILOT_EXCLUDED_BLOCK_TYPES,
FindBlockTool,
)
from backend.api.features.chat.tools.models import BlockListResponse
from backend.data.block import BlockType
from ._test_data import make_session
_TEST_USER_ID = "test-user-find-block"
def make_mock_block(
block_id: str, name: str, block_type: BlockType, disabled: bool = False
):
"""Create a mock block for testing."""
mock = MagicMock()
mock.id = block_id
mock.name = name
mock.description = f"{name} description"
mock.block_type = block_type
mock.disabled = disabled
mock.input_schema = MagicMock()
mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
mock.input_schema.get_credentials_fields.return_value = {}
mock.output_schema = MagicMock()
mock.output_schema.jsonschema.return_value = {}
mock.categories = []
return mock
class TestFindBlockFiltering:
"""Tests for block filtering in FindBlockTool."""
def test_excluded_block_types_contains_expected_types(self):
"""Verify COPILOT_EXCLUDED_BLOCK_TYPES contains all graph-only types."""
assert BlockType.INPUT in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.OUTPUT in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.WEBHOOK in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.WEBHOOK_MANUAL in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.NOTE in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.HUMAN_IN_THE_LOOP in COPILOT_EXCLUDED_BLOCK_TYPES
assert BlockType.AGENT in COPILOT_EXCLUDED_BLOCK_TYPES
def test_excluded_block_ids_contains_smart_decision_maker(self):
"""Verify SmartDecisionMakerBlock is in COPILOT_EXCLUDED_BLOCK_IDS."""
assert "3b191d9f-356f-482d-8238-ba04b6d18381" in COPILOT_EXCLUDED_BLOCK_IDS
@pytest.mark.asyncio(loop_scope="session")
async def test_excluded_block_type_filtered_from_results(self):
"""Verify blocks with excluded BlockTypes are filtered from search results."""
session = make_session(user_id=_TEST_USER_ID)
# Mock search returns an INPUT block (excluded) and a STANDARD block (included)
search_results = [
{"content_id": "input-block-id", "score": 0.9},
{"content_id": "standard-block-id", "score": 0.8},
]
input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)
standard_block = make_mock_block(
"standard-block-id", "HTTP Request", BlockType.STANDARD
)
def mock_get_block(block_id):
return {
"input-block-id": input_block,
"standard-block-id": standard_block,
}.get(block_id)
with patch(
"backend.api.features.chat.tools.find_block.unified_hybrid_search",
new_callable=AsyncMock,
return_value=(search_results, 2),
):
with patch(
"backend.api.features.chat.tools.find_block.get_block",
side_effect=mock_get_block,
):
tool = FindBlockTool()
response = await tool._execute(
user_id=_TEST_USER_ID, session=session, query="test"
)
# Should only return the standard block, not the INPUT block
assert isinstance(response, BlockListResponse)
assert len(response.blocks) == 1
assert response.blocks[0].id == "standard-block-id"
@pytest.mark.asyncio(loop_scope="session")
async def test_excluded_block_id_filtered_from_results(self):
"""Verify SmartDecisionMakerBlock is filtered from search results."""
session = make_session(user_id=_TEST_USER_ID)
smart_decision_id = "3b191d9f-356f-482d-8238-ba04b6d18381"
search_results = [
{"content_id": smart_decision_id, "score": 0.9},
{"content_id": "normal-block-id", "score": 0.8},
]
# SmartDecisionMakerBlock has STANDARD type but is excluded by ID
smart_block = make_mock_block(
smart_decision_id, "Smart Decision Maker", BlockType.STANDARD
)
normal_block = make_mock_block(
"normal-block-id", "Normal Block", BlockType.STANDARD
)
def mock_get_block(block_id):
return {
smart_decision_id: smart_block,
"normal-block-id": normal_block,
}.get(block_id)
with patch(
"backend.api.features.chat.tools.find_block.unified_hybrid_search",
new_callable=AsyncMock,
return_value=(search_results, 2),
):
with patch(
"backend.api.features.chat.tools.find_block.get_block",
side_effect=mock_get_block,
):
tool = FindBlockTool()
response = await tool._execute(
user_id=_TEST_USER_ID, session=session, query="decision"
)
# Should only return normal block, not SmartDecisionMakerBlock
assert isinstance(response, BlockListResponse)
assert len(response.blocks) == 1
assert response.blocks[0].id == "normal-block-id"

View File

@@ -1,29 +0,0 @@
"""Shared helpers for chat tools."""
from typing import Any
def get_inputs_from_schema(
input_schema: dict[str, Any],
exclude_fields: set[str] | None = None,
) -> list[dict[str, Any]]:
"""Extract input field info from JSON schema."""
if not isinstance(input_schema, dict):
return []
exclude = exclude_fields or set()
properties = input_schema.get("properties", {})
required = set(input_schema.get("required", []))
return [
{
"name": name,
"title": schema.get("title", name),
"type": schema.get("type", "string"),
"description": schema.get("description", ""),
"required": name in required,
"default": schema.get("default"),
}
for name, schema in properties.items()
if name not in exclude
]

View File

@@ -24,7 +24,6 @@ from backend.util.timezone_utils import (
)
from .base import BaseTool
from .helpers import get_inputs_from_schema
from .models import (
AgentDetails,
AgentDetailsResponse,
@@ -262,7 +261,7 @@ class RunAgentTool(BaseTool):
),
requirements={
"credentials": requirements_creds_list,
"inputs": get_inputs_from_schema(graph.input_schema),
"inputs": self._get_inputs_list(graph.input_schema),
"execution_modes": self._get_execution_modes(graph),
},
),
@@ -370,6 +369,22 @@ class RunAgentTool(BaseTool):
session_id=session_id,
)
def _get_inputs_list(self, input_schema: dict[str, Any]) -> list[dict[str, Any]]:
"""Extract inputs list from schema."""
inputs_list = []
if isinstance(input_schema, dict) and "properties" in input_schema:
for field_name, field_schema in input_schema["properties"].items():
inputs_list.append(
{
"name": field_name,
"title": field_schema.get("title", field_name),
"type": field_schema.get("type", "string"),
"description": field_schema.get("description", ""),
"required": field_name in input_schema.get("required", []),
}
)
return inputs_list
def _get_execution_modes(self, graph: GraphModel) -> list[str]:
"""Get available execution modes for the graph."""
trigger_info = graph.trigger_setup_info
@@ -383,7 +398,7 @@ class RunAgentTool(BaseTool):
suffix: str,
) -> str:
"""Build a message describing available inputs for an agent."""
inputs_list = get_inputs_from_schema(graph.input_schema)
inputs_list = self._get_inputs_list(graph.input_schema)
required_names = [i["name"] for i in inputs_list if i["required"]]
optional_names = [i["name"] for i in inputs_list if not i["required"]]

View File

@@ -8,19 +8,14 @@ from typing import Any
from pydantic_core import PydanticUndefined
from backend.api.features.chat.model import ChatSession
from backend.api.features.chat.tools.find_block import (
COPILOT_EXCLUDED_BLOCK_IDS,
COPILOT_EXCLUDED_BLOCK_TYPES,
)
from backend.data.block import AnyBlockSchema, get_block
from backend.data.block import get_block
from backend.data.execution import ExecutionContext
from backend.data.model import CredentialsFieldInfo, CredentialsMetaInput
from backend.data.model import CredentialsMetaInput
from backend.data.workspace import get_or_create_workspace
from backend.integrations.creds_manager import IntegrationCredentialsManager
from backend.util.exceptions import BlockError
from .base import BaseTool
from .helpers import get_inputs_from_schema
from .models import (
BlockOutputResponse,
ErrorResponse,
@@ -29,10 +24,7 @@ from .models import (
ToolResponseBase,
UserReadiness,
)
from .utils import (
build_missing_credentials_from_field_info,
match_credentials_to_requirements,
)
from .utils import build_missing_credentials_from_field_info
logger = logging.getLogger(__name__)
@@ -81,6 +73,91 @@ class RunBlockTool(BaseTool):
def requires_auth(self) -> bool:
return True
async def _check_block_credentials(
self,
user_id: str,
block: Any,
input_data: dict[str, Any] | None = None,
) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
"""
Check if user has required credentials for a block.
Args:
user_id: User ID
block: Block to check credentials for
input_data: Input data for the block (used to determine provider via discriminator)
Returns:
tuple[matched_credentials, missing_credentials]
"""
matched_credentials: dict[str, CredentialsMetaInput] = {}
missing_credentials: list[CredentialsMetaInput] = []
input_data = input_data or {}
# Get credential field info from block's input schema
credentials_fields_info = block.input_schema.get_credentials_fields_info()
if not credentials_fields_info:
return matched_credentials, missing_credentials
# Get user's available credentials
creds_manager = IntegrationCredentialsManager()
available_creds = await creds_manager.store.get_all_creds(user_id)
for field_name, field_info in credentials_fields_info.items():
effective_field_info = field_info
if field_info.discriminator and field_info.discriminator_mapping:
# Get discriminator from input, falling back to schema default
discriminator_value = input_data.get(field_info.discriminator)
if discriminator_value is None:
field = block.input_schema.model_fields.get(
field_info.discriminator
)
if field and field.default is not PydanticUndefined:
discriminator_value = field.default
if (
discriminator_value
and discriminator_value in field_info.discriminator_mapping
):
effective_field_info = field_info.discriminate(discriminator_value)
logger.debug(
f"Discriminated provider for {field_name}: "
f"{discriminator_value} -> {effective_field_info.provider}"
)
matching_cred = next(
(
cred
for cred in available_creds
if cred.provider in effective_field_info.provider
and cred.type in effective_field_info.supported_types
),
None,
)
if matching_cred:
matched_credentials[field_name] = CredentialsMetaInput(
id=matching_cred.id,
provider=matching_cred.provider, # type: ignore
type=matching_cred.type,
title=matching_cred.title,
)
else:
# Create a placeholder for the missing credential
provider = next(iter(effective_field_info.provider), "unknown")
cred_type = next(iter(effective_field_info.supported_types), "api_key")
missing_credentials.append(
CredentialsMetaInput(
id=field_name,
provider=provider, # type: ignore
type=cred_type, # type: ignore
title=field_name.replace("_", " ").title(),
)
)
return matched_credentials, missing_credentials
async def _execute(
self,
user_id: str | None,
@@ -135,24 +212,11 @@ class RunBlockTool(BaseTool):
session_id=session_id,
)
# Check if block is excluded from CoPilot (graph-only blocks)
if (
block.block_type in COPILOT_EXCLUDED_BLOCK_TYPES
or block.id in COPILOT_EXCLUDED_BLOCK_IDS
):
return ErrorResponse(
message=(
f"Block '{block.name}' cannot be run directly in CoPilot. "
"This block is designed for use within graphs only."
),
session_id=session_id,
)
logger.info(f"Executing block {block.name} ({block_id}) for user {user_id}")
creds_manager = IntegrationCredentialsManager()
matched_credentials, missing_credentials = (
await self._resolve_block_credentials(user_id, block, input_data)
matched_credentials, missing_credentials = await self._check_block_credentials(
user_id, block, input_data
)
if missing_credentials:
@@ -281,75 +345,29 @@ class RunBlockTool(BaseTool):
session_id=session_id,
)
async def _resolve_block_credentials(
self,
user_id: str,
block: AnyBlockSchema,
input_data: dict[str, Any] | None = None,
) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
"""
Resolve credentials for a block by matching user's available credentials.
Args:
user_id: User ID
block: Block to resolve credentials for
input_data: Input data for the block (used to determine provider via discriminator)
Returns:
tuple of (matched_credentials, missing_credentials) - matched credentials
are used for block execution, missing ones indicate setup requirements.
"""
input_data = input_data or {}
requirements = self._resolve_discriminated_credentials(block, input_data)
if not requirements:
return {}, []
return await match_credentials_to_requirements(user_id, requirements)
def _get_inputs_list(self, block: AnyBlockSchema) -> list[dict[str, Any]]:
def _get_inputs_list(self, block: Any) -> list[dict[str, Any]]:
"""Extract non-credential inputs from block schema."""
inputs_list = []
schema = block.input_schema.jsonschema()
properties = schema.get("properties", {})
required_fields = set(schema.get("required", []))
# Get credential field names to exclude
credentials_fields = set(block.input_schema.get_credentials_fields().keys())
return get_inputs_from_schema(schema, exclude_fields=credentials_fields)
def _resolve_discriminated_credentials(
self,
block: AnyBlockSchema,
input_data: dict[str, Any],
) -> dict[str, CredentialsFieldInfo]:
"""Resolve credential requirements, applying discriminator logic where needed."""
credentials_fields_info = block.input_schema.get_credentials_fields_info()
if not credentials_fields_info:
return {}
for field_name, field_schema in properties.items():
# Skip credential fields
if field_name in credentials_fields:
continue
resolved: dict[str, CredentialsFieldInfo] = {}
inputs_list.append(
{
"name": field_name,
"title": field_schema.get("title", field_name),
"type": field_schema.get("type", "string"),
"description": field_schema.get("description", ""),
"required": field_name in required_fields,
}
)
for field_name, field_info in credentials_fields_info.items():
effective_field_info = field_info
if field_info.discriminator and field_info.discriminator_mapping:
discriminator_value = input_data.get(field_info.discriminator)
if discriminator_value is None:
field = block.input_schema.model_fields.get(
field_info.discriminator
)
if field and field.default is not PydanticUndefined:
discriminator_value = field.default
if (
discriminator_value
and discriminator_value in field_info.discriminator_mapping
):
effective_field_info = field_info.discriminate(discriminator_value)
# For host-scoped credentials, add the discriminator value
# (e.g., URL) so _credential_is_for_host can match it
effective_field_info.discriminator_values.add(discriminator_value)
logger.debug(
f"Discriminated provider for {field_name}: "
f"{discriminator_value} -> {effective_field_info.provider}"
)
resolved[field_name] = effective_field_info
return resolved
return inputs_list

View File

@@ -1,106 +0,0 @@
"""Tests for block execution guards in RunBlockTool."""
from unittest.mock import MagicMock, patch
import pytest
from backend.api.features.chat.tools.models import ErrorResponse
from backend.api.features.chat.tools.run_block import RunBlockTool
from backend.data.block import BlockType
from ._test_data import make_session
_TEST_USER_ID = "test-user-run-block"
def make_mock_block(
block_id: str, name: str, block_type: BlockType, disabled: bool = False
):
"""Create a mock block for testing."""
mock = MagicMock()
mock.id = block_id
mock.name = name
mock.block_type = block_type
mock.disabled = disabled
mock.input_schema = MagicMock()
mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
mock.input_schema.get_credentials_fields_info.return_value = []
return mock
class TestRunBlockFiltering:
"""Tests for block execution guards in RunBlockTool."""
@pytest.mark.asyncio(loop_scope="session")
async def test_excluded_block_type_returns_error(self):
"""Attempting to execute a block with excluded BlockType returns error."""
session = make_session(user_id=_TEST_USER_ID)
input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)
with patch(
"backend.api.features.chat.tools.run_block.get_block",
return_value=input_block,
):
tool = RunBlockTool()
response = await tool._execute(
user_id=_TEST_USER_ID,
session=session,
block_id="input-block-id",
input_data={},
)
assert isinstance(response, ErrorResponse)
assert "cannot be run directly in CoPilot" in response.message
assert "designed for use within graphs only" in response.message
@pytest.mark.asyncio(loop_scope="session")
async def test_excluded_block_id_returns_error(self):
"""Attempting to execute SmartDecisionMakerBlock returns error."""
session = make_session(user_id=_TEST_USER_ID)
smart_decision_id = "3b191d9f-356f-482d-8238-ba04b6d18381"
smart_block = make_mock_block(
smart_decision_id, "Smart Decision Maker", BlockType.STANDARD
)
with patch(
"backend.api.features.chat.tools.run_block.get_block",
return_value=smart_block,
):
tool = RunBlockTool()
response = await tool._execute(
user_id=_TEST_USER_ID,
session=session,
block_id=smart_decision_id,
input_data={},
)
assert isinstance(response, ErrorResponse)
assert "cannot be run directly in CoPilot" in response.message
@pytest.mark.asyncio(loop_scope="session")
async def test_non_excluded_block_passes_guard(self):
"""Non-excluded blocks pass the filtering guard (may fail later for other reasons)."""
session = make_session(user_id=_TEST_USER_ID)
standard_block = make_mock_block(
"standard-id", "HTTP Request", BlockType.STANDARD
)
with patch(
"backend.api.features.chat.tools.run_block.get_block",
return_value=standard_block,
):
tool = RunBlockTool()
response = await tool._execute(
user_id=_TEST_USER_ID,
session=session,
block_id="standard-id",
input_data={},
)
# Should NOT be an ErrorResponse about CoPilot exclusion
# (may be other errors like missing credentials, but not the exclusion guard)
if isinstance(response, ErrorResponse):
assert "cannot be run directly in CoPilot" not in response.message

View File

@@ -8,7 +8,6 @@ from backend.api.features.library import model as library_model
from backend.api.features.store import db as store_db
from backend.data.graph import GraphModel
from backend.data.model import (
Credentials,
CredentialsFieldInfo,
CredentialsMetaInput,
HostScopedCredentials,
@@ -118,7 +117,7 @@ def build_missing_credentials_from_graph(
preserving all supported credential types for each field.
"""
matched_keys = set(matched_credentials.keys()) if matched_credentials else set()
aggregated_fields = graph.aggregate_credentials_inputs()
aggregated_fields = graph.regular_credentials_inputs
return {
field_key: _serialize_missing_credential(field_key, field_info)
@@ -224,99 +223,6 @@ async def get_or_create_library_agent(
return library_agents[0]
async def match_credentials_to_requirements(
user_id: str,
requirements: dict[str, CredentialsFieldInfo],
) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
"""
Match user's credentials against a dictionary of credential requirements.
This is the core matching logic shared by both graph and block credential matching.
"""
matched: dict[str, CredentialsMetaInput] = {}
missing: list[CredentialsMetaInput] = []
if not requirements:
return matched, missing
available_creds = await get_user_credentials(user_id)
for field_name, field_info in requirements.items():
matching_cred = find_matching_credential(available_creds, field_info)
if matching_cred:
try:
matched[field_name] = create_credential_meta_from_match(matching_cred)
except Exception as e:
logger.error(
f"Failed to create CredentialsMetaInput for field '{field_name}': "
f"provider={matching_cred.provider}, type={matching_cred.type}, "
f"credential_id={matching_cred.id}",
exc_info=True,
)
provider = next(iter(field_info.provider), "unknown")
cred_type = next(iter(field_info.supported_types), "api_key")
missing.append(
CredentialsMetaInput(
id=field_name,
provider=provider, # type: ignore
type=cred_type, # type: ignore
title=f"{field_name} (validation failed: {e})",
)
)
else:
provider = next(iter(field_info.provider), "unknown")
cred_type = next(iter(field_info.supported_types), "api_key")
missing.append(
CredentialsMetaInput(
id=field_name,
provider=provider, # type: ignore
type=cred_type, # type: ignore
title=field_name.replace("_", " ").title(),
)
)
return matched, missing
async def get_user_credentials(user_id: str) -> list[Credentials]:
"""Get all available credentials for a user."""
creds_manager = IntegrationCredentialsManager()
return await creds_manager.store.get_all_creds(user_id)
def find_matching_credential(
available_creds: list[Credentials],
field_info: CredentialsFieldInfo,
) -> Credentials | None:
"""Find a credential that matches the required provider, type, scopes, and host."""
for cred in available_creds:
if cred.provider not in field_info.provider:
continue
if cred.type not in field_info.supported_types:
continue
if cred.type == "oauth2" and not _credential_has_required_scopes(
cred, field_info
):
continue
if cred.type == "host_scoped" and not _credential_is_for_host(cred, field_info):
continue
return cred
return None
def create_credential_meta_from_match(
matching_cred: Credentials,
) -> CredentialsMetaInput:
"""Create a CredentialsMetaInput from a matched credential."""
return CredentialsMetaInput(
id=matching_cred.id,
provider=matching_cred.provider, # type: ignore
type=matching_cred.type,
title=matching_cred.title,
)
async def match_user_credentials_to_graph(
user_id: str,
graph: GraphModel,
@@ -338,7 +244,7 @@ async def match_user_credentials_to_graph(
missing_creds: list[str] = []
# Get aggregated credentials requirements from the graph
aggregated_creds = graph.aggregate_credentials_inputs()
aggregated_creds = graph.regular_credentials_inputs
logger.debug(
f"Matching credentials for graph {graph.id}: {len(aggregated_creds)} required"
)
@@ -425,6 +331,8 @@ def _credential_has_required_scopes(
# If no scopes are required, any credential matches
if not requirements.required_scopes:
return True
# Check that credential scopes are a superset of required scopes
return set(credential.scopes).issuperset(requirements.required_scopes)

View File

@@ -0,0 +1,78 @@
"""Tests for chat tools utility functions."""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from backend.data.model import CredentialsFieldInfo
def _make_regular_field() -> CredentialsFieldInfo:
return CredentialsFieldInfo.model_validate(
{
"credentials_provider": ["github"],
"credentials_types": ["api_key"],
"is_auto_credential": False,
},
by_alias=True,
)
def test_build_missing_credentials_excludes_auto_creds():
"""
build_missing_credentials_from_graph() should use regular_credentials_inputs
and thus exclude auto_credentials from the "missing" set.
"""
from backend.api.features.chat.tools.utils import (
build_missing_credentials_from_graph,
)
regular_field = _make_regular_field()
mock_graph = MagicMock()
# regular_credentials_inputs should only return the non-auto field
mock_graph.regular_credentials_inputs = {
"github_api_key": (regular_field, {("node-1", "credentials")}, True),
}
result = build_missing_credentials_from_graph(mock_graph, matched_credentials=None)
# Should include the regular credential
assert "github_api_key" in result
# Should NOT include the auto_credential (not in regular_credentials_inputs)
assert "google_oauth2" not in result
@pytest.mark.asyncio
async def test_match_user_credentials_excludes_auto_creds():
"""
match_user_credentials_to_graph() should use regular_credentials_inputs
and thus exclude auto_credentials from matching.
"""
from backend.api.features.chat.tools.utils import match_user_credentials_to_graph
regular_field = _make_regular_field()
mock_graph = MagicMock()
mock_graph.id = "test-graph"
# regular_credentials_inputs returns only non-auto fields
mock_graph.regular_credentials_inputs = {
"github_api_key": (regular_field, {("node-1", "credentials")}, True),
}
# Mock the credentials manager to return no credentials
with patch(
"backend.api.features.chat.tools.utils.IntegrationCredentialsManager"
) as MockCredsMgr:
mock_store = AsyncMock()
mock_store.get_all_creds.return_value = []
MockCredsMgr.return_value.store = mock_store
matched, missing = await match_user_credentials_to_graph(
user_id="test-user", graph=mock_graph
)
# No credentials available, so github should be missing
assert len(matched) == 0
assert len(missing) == 1
assert "github_api_key" in missing[0]

View File

@@ -1103,7 +1103,7 @@ async def create_preset_from_graph_execution(
raise NotFoundError(
f"Graph #{graph_execution.graph_id} not found or accessible"
)
elif len(graph.aggregate_credentials_inputs()) > 0:
elif len(graph.regular_credentials_inputs) > 0:
raise ValueError(
f"Graph execution #{graph_exec_id} can't be turned into a preset "
"because it was run before this feature existed "

View File

@@ -478,7 +478,7 @@ class ExaCreateOrFindWebsetBlock(Block):
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
try:
webset = await aexa.websets.get(id=input_data.external_id)
webset = aexa.websets.get(id=input_data.external_id)
webset_result = Webset.model_validate(webset.model_dump(by_alias=True))
yield "webset", webset_result
@@ -494,7 +494,7 @@ class ExaCreateOrFindWebsetBlock(Block):
count=input_data.search_count,
)
webset = await aexa.websets.create(
webset = aexa.websets.create(
params=CreateWebsetParameters(
search=search_params,
external_id=input_data.external_id,
@@ -554,7 +554,7 @@ class ExaUpdateWebsetBlock(Block):
if input_data.metadata is not None:
payload["metadata"] = input_data.metadata
sdk_webset = await aexa.websets.update(id=input_data.webset_id, params=payload)
sdk_webset = aexa.websets.update(id=input_data.webset_id, params=payload)
status_str = (
sdk_webset.status.value
@@ -617,7 +617,7 @@ class ExaListWebsetsBlock(Block):
) -> BlockOutput:
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
response = await aexa.websets.list(
response = aexa.websets.list(
cursor=input_data.cursor,
limit=input_data.limit,
)
@@ -678,7 +678,7 @@ class ExaGetWebsetBlock(Block):
) -> BlockOutput:
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
sdk_webset = await aexa.websets.get(id=input_data.webset_id)
sdk_webset = aexa.websets.get(id=input_data.webset_id)
status_str = (
sdk_webset.status.value
@@ -748,7 +748,7 @@ class ExaDeleteWebsetBlock(Block):
) -> BlockOutput:
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
deleted_webset = await aexa.websets.delete(id=input_data.webset_id)
deleted_webset = aexa.websets.delete(id=input_data.webset_id)
status_str = (
deleted_webset.status.value
@@ -798,7 +798,7 @@ class ExaCancelWebsetBlock(Block):
) -> BlockOutput:
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
canceled_webset = await aexa.websets.cancel(id=input_data.webset_id)
canceled_webset = aexa.websets.cancel(id=input_data.webset_id)
status_str = (
canceled_webset.status.value
@@ -968,7 +968,7 @@ class ExaPreviewWebsetBlock(Block):
entity["description"] = input_data.entity_description
payload["entity"] = entity
sdk_preview = await aexa.websets.preview(params=payload)
sdk_preview = aexa.websets.preview(params=payload)
preview = PreviewWebsetModel.from_sdk(sdk_preview)
@@ -1051,7 +1051,7 @@ class ExaWebsetStatusBlock(Block):
) -> BlockOutput:
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
webset = await aexa.websets.get(id=input_data.webset_id)
webset = aexa.websets.get(id=input_data.webset_id)
status = (
webset.status.value
@@ -1185,7 +1185,7 @@ class ExaWebsetSummaryBlock(Block):
) -> BlockOutput:
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
webset = await aexa.websets.get(id=input_data.webset_id)
webset = aexa.websets.get(id=input_data.webset_id)
# Extract basic info
webset_id = webset.id
@@ -1211,7 +1211,7 @@ class ExaWebsetSummaryBlock(Block):
total_items = 0
if input_data.include_sample_items and input_data.sample_size > 0:
items_response = await aexa.websets.items.list(
items_response = aexa.websets.items.list(
webset_id=input_data.webset_id, limit=input_data.sample_size
)
sample_items_data = [
@@ -1362,7 +1362,7 @@ class ExaWebsetReadyCheckBlock(Block):
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
# Get webset details
webset = await aexa.websets.get(id=input_data.webset_id)
webset = aexa.websets.get(id=input_data.webset_id)
status = (
webset.status.value

View File

@@ -202,7 +202,7 @@ class ExaCreateEnrichmentBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
sdk_enrichment = await aexa.websets.enrichments.create(
sdk_enrichment = aexa.websets.enrichments.create(
webset_id=input_data.webset_id, params=payload
)
@@ -223,7 +223,7 @@ class ExaCreateEnrichmentBlock(Block):
items_enriched = 0
while time.time() - poll_start < input_data.polling_timeout:
current_enrich = await aexa.websets.enrichments.get(
current_enrich = aexa.websets.enrichments.get(
webset_id=input_data.webset_id, id=enrichment_id
)
current_status = (
@@ -234,7 +234,7 @@ class ExaCreateEnrichmentBlock(Block):
if current_status in ["completed", "failed", "cancelled"]:
# Estimate items from webset searches
webset = await aexa.websets.get(id=input_data.webset_id)
webset = aexa.websets.get(id=input_data.webset_id)
if webset.searches:
for search in webset.searches:
if search.progress:
@@ -329,7 +329,7 @@ class ExaGetEnrichmentBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
sdk_enrichment = await aexa.websets.enrichments.get(
sdk_enrichment = aexa.websets.enrichments.get(
webset_id=input_data.webset_id, id=input_data.enrichment_id
)
@@ -474,7 +474,7 @@ class ExaDeleteEnrichmentBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
deleted_enrichment = await aexa.websets.enrichments.delete(
deleted_enrichment = aexa.websets.enrichments.delete(
webset_id=input_data.webset_id, id=input_data.enrichment_id
)
@@ -525,13 +525,13 @@ class ExaCancelEnrichmentBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
canceled_enrichment = await aexa.websets.enrichments.cancel(
canceled_enrichment = aexa.websets.enrichments.cancel(
webset_id=input_data.webset_id, id=input_data.enrichment_id
)
# Try to estimate how many items were enriched before cancellation
items_enriched = 0
items_response = await aexa.websets.items.list(
items_response = aexa.websets.items.list(
webset_id=input_data.webset_id, limit=100
)

View File

@@ -222,7 +222,7 @@ class ExaCreateImportBlock(Block):
def _create_test_mock():
"""Create test mocks for the AsyncExa SDK."""
from datetime import datetime
from unittest.mock import AsyncMock, MagicMock
from unittest.mock import MagicMock
# Create mock SDK import object
mock_import = MagicMock()
@@ -247,7 +247,7 @@ class ExaCreateImportBlock(Block):
return {
"_get_client": lambda *args, **kwargs: MagicMock(
websets=MagicMock(
imports=MagicMock(create=AsyncMock(return_value=mock_import))
imports=MagicMock(create=lambda *args, **kwargs: mock_import)
)
)
}
@@ -294,7 +294,7 @@ class ExaCreateImportBlock(Block):
if input_data.metadata:
payload["metadata"] = input_data.metadata
sdk_import = await aexa.websets.imports.create(
sdk_import = aexa.websets.imports.create(
params=payload, csv_data=input_data.csv_data
)
@@ -360,7 +360,7 @@ class ExaGetImportBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
sdk_import = await aexa.websets.imports.get(import_id=input_data.import_id)
sdk_import = aexa.websets.imports.get(import_id=input_data.import_id)
import_obj = ImportModel.from_sdk(sdk_import)
@@ -426,7 +426,7 @@ class ExaListImportsBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
response = await aexa.websets.imports.list(
response = aexa.websets.imports.list(
cursor=input_data.cursor,
limit=input_data.limit,
)
@@ -474,9 +474,7 @@ class ExaDeleteImportBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
deleted_import = await aexa.websets.imports.delete(
import_id=input_data.import_id
)
deleted_import = aexa.websets.imports.delete(import_id=input_data.import_id)
yield "import_id", deleted_import.id
yield "success", "true"
@@ -575,14 +573,14 @@ class ExaExportWebsetBlock(Block):
}
)
# Create async iterator for list_all
async def async_item_iterator(*args, **kwargs):
for item in [mock_item1, mock_item2]:
yield item
# Create mock iterator
mock_items = [mock_item1, mock_item2]
return {
"_get_client": lambda *args, **kwargs: MagicMock(
websets=MagicMock(items=MagicMock(list_all=async_item_iterator))
websets=MagicMock(
items=MagicMock(list_all=lambda *args, **kwargs: iter(mock_items))
)
)
}
@@ -604,7 +602,7 @@ class ExaExportWebsetBlock(Block):
webset_id=input_data.webset_id, limit=input_data.max_items
)
async for sdk_item in item_iterator:
for sdk_item in item_iterator:
if len(all_items) >= input_data.max_items:
break

View File

@@ -178,7 +178,7 @@ class ExaGetWebsetItemBlock(Block):
) -> BlockOutput:
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
sdk_item = await aexa.websets.items.get(
sdk_item = aexa.websets.items.get(
webset_id=input_data.webset_id, id=input_data.item_id
)
@@ -269,7 +269,7 @@ class ExaListWebsetItemsBlock(Block):
response = None
while time.time() - start_time < input_data.wait_timeout:
response = await aexa.websets.items.list(
response = aexa.websets.items.list(
webset_id=input_data.webset_id,
cursor=input_data.cursor,
limit=input_data.limit,
@@ -282,13 +282,13 @@ class ExaListWebsetItemsBlock(Block):
interval = min(interval * 1.2, 10)
if not response:
response = await aexa.websets.items.list(
response = aexa.websets.items.list(
webset_id=input_data.webset_id,
cursor=input_data.cursor,
limit=input_data.limit,
)
else:
response = await aexa.websets.items.list(
response = aexa.websets.items.list(
webset_id=input_data.webset_id,
cursor=input_data.cursor,
limit=input_data.limit,
@@ -340,7 +340,7 @@ class ExaDeleteWebsetItemBlock(Block):
) -> BlockOutput:
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
deleted_item = await aexa.websets.items.delete(
deleted_item = aexa.websets.items.delete(
webset_id=input_data.webset_id, id=input_data.item_id
)
@@ -408,7 +408,7 @@ class ExaBulkWebsetItemsBlock(Block):
webset_id=input_data.webset_id, limit=input_data.max_items
)
async for sdk_item in item_iterator:
for sdk_item in item_iterator:
if len(all_items) >= input_data.max_items:
break
@@ -475,7 +475,7 @@ class ExaWebsetItemsSummaryBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
webset = await aexa.websets.get(id=input_data.webset_id)
webset = aexa.websets.get(id=input_data.webset_id)
entity_type = "unknown"
if webset.searches:
@@ -495,7 +495,7 @@ class ExaWebsetItemsSummaryBlock(Block):
# Get sample items if requested
sample_items: List[WebsetItemModel] = []
if input_data.sample_size > 0:
items_response = await aexa.websets.items.list(
items_response = aexa.websets.items.list(
webset_id=input_data.webset_id, limit=input_data.sample_size
)
# Convert to our stable models
@@ -569,7 +569,7 @@ class ExaGetNewItemsBlock(Block):
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
# Get items starting from cursor
response = await aexa.websets.items.list(
response = aexa.websets.items.list(
webset_id=input_data.webset_id,
cursor=input_data.since_cursor,
limit=input_data.max_items,

View File

@@ -233,7 +233,7 @@ class ExaCreateMonitorBlock(Block):
def _create_test_mock():
"""Create test mocks for the AsyncExa SDK."""
from datetime import datetime
from unittest.mock import AsyncMock, MagicMock
from unittest.mock import MagicMock
# Create mock SDK monitor object
mock_monitor = MagicMock()
@@ -263,7 +263,7 @@ class ExaCreateMonitorBlock(Block):
return {
"_get_client": lambda *args, **kwargs: MagicMock(
websets=MagicMock(
monitors=MagicMock(create=AsyncMock(return_value=mock_monitor))
monitors=MagicMock(create=lambda *args, **kwargs: mock_monitor)
)
)
}
@@ -320,7 +320,7 @@ class ExaCreateMonitorBlock(Block):
if input_data.metadata:
payload["metadata"] = input_data.metadata
sdk_monitor = await aexa.websets.monitors.create(params=payload)
sdk_monitor = aexa.websets.monitors.create(params=payload)
monitor = MonitorModel.from_sdk(sdk_monitor)
@@ -384,7 +384,7 @@ class ExaGetMonitorBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
sdk_monitor = await aexa.websets.monitors.get(monitor_id=input_data.monitor_id)
sdk_monitor = aexa.websets.monitors.get(monitor_id=input_data.monitor_id)
monitor = MonitorModel.from_sdk(sdk_monitor)
@@ -476,7 +476,7 @@ class ExaUpdateMonitorBlock(Block):
if input_data.metadata is not None:
payload["metadata"] = input_data.metadata
sdk_monitor = await aexa.websets.monitors.update(
sdk_monitor = aexa.websets.monitors.update(
monitor_id=input_data.monitor_id, params=payload
)
@@ -522,9 +522,7 @@ class ExaDeleteMonitorBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
deleted_monitor = await aexa.websets.monitors.delete(
monitor_id=input_data.monitor_id
)
deleted_monitor = aexa.websets.monitors.delete(monitor_id=input_data.monitor_id)
yield "monitor_id", deleted_monitor.id
yield "success", "true"
@@ -581,7 +579,7 @@ class ExaListMonitorsBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
response = await aexa.websets.monitors.list(
response = aexa.websets.monitors.list(
cursor=input_data.cursor,
limit=input_data.limit,
webset_id=input_data.webset_id,

View File

@@ -121,7 +121,7 @@ class ExaWaitForWebsetBlock(Block):
WebsetTargetStatus.IDLE,
WebsetTargetStatus.ANY_COMPLETE,
]:
final_webset = await aexa.websets.wait_until_idle(
final_webset = aexa.websets.wait_until_idle(
id=input_data.webset_id,
timeout=input_data.timeout,
poll_interval=input_data.check_interval,
@@ -164,7 +164,7 @@ class ExaWaitForWebsetBlock(Block):
interval = input_data.check_interval
while time.time() - start_time < input_data.timeout:
# Get current webset status
webset = await aexa.websets.get(id=input_data.webset_id)
webset = aexa.websets.get(id=input_data.webset_id)
current_status = (
webset.status.value
if hasattr(webset.status, "value")
@@ -209,7 +209,7 @@ class ExaWaitForWebsetBlock(Block):
# Timeout reached
elapsed = time.time() - start_time
webset = await aexa.websets.get(id=input_data.webset_id)
webset = aexa.websets.get(id=input_data.webset_id)
final_status = (
webset.status.value
if hasattr(webset.status, "value")
@@ -345,7 +345,7 @@ class ExaWaitForSearchBlock(Block):
try:
while time.time() - start_time < input_data.timeout:
# Get current search status using SDK
search = await aexa.websets.searches.get(
search = aexa.websets.searches.get(
webset_id=input_data.webset_id, id=input_data.search_id
)
@@ -401,7 +401,7 @@ class ExaWaitForSearchBlock(Block):
elapsed = time.time() - start_time
# Get last known status
search = await aexa.websets.searches.get(
search = aexa.websets.searches.get(
webset_id=input_data.webset_id, id=input_data.search_id
)
final_status = (
@@ -503,7 +503,7 @@ class ExaWaitForEnrichmentBlock(Block):
try:
while time.time() - start_time < input_data.timeout:
# Get current enrichment status using SDK
enrichment = await aexa.websets.enrichments.get(
enrichment = aexa.websets.enrichments.get(
webset_id=input_data.webset_id, id=input_data.enrichment_id
)
@@ -548,7 +548,7 @@ class ExaWaitForEnrichmentBlock(Block):
elapsed = time.time() - start_time
# Get last known status
enrichment = await aexa.websets.enrichments.get(
enrichment = aexa.websets.enrichments.get(
webset_id=input_data.webset_id, id=input_data.enrichment_id
)
final_status = (
@@ -575,7 +575,7 @@ class ExaWaitForEnrichmentBlock(Block):
) -> tuple[list[SampleEnrichmentModel], int]:
"""Get sample enriched data and count."""
# Get a few items to see enrichment results using SDK
response = await aexa.websets.items.list(webset_id=webset_id, limit=5)
response = aexa.websets.items.list(webset_id=webset_id, limit=5)
sample_data: list[SampleEnrichmentModel] = []
enriched_count = 0

View File

@@ -317,7 +317,7 @@ class ExaCreateWebsetSearchBlock(Block):
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
sdk_search = await aexa.websets.searches.create(
sdk_search = aexa.websets.searches.create(
webset_id=input_data.webset_id, params=payload
)
@@ -350,7 +350,7 @@ class ExaCreateWebsetSearchBlock(Block):
poll_start = time.time()
while time.time() - poll_start < input_data.polling_timeout:
current_search = await aexa.websets.searches.get(
current_search = aexa.websets.searches.get(
webset_id=input_data.webset_id, id=search_id
)
current_status = (
@@ -442,7 +442,7 @@ class ExaGetWebsetSearchBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
sdk_search = await aexa.websets.searches.get(
sdk_search = aexa.websets.searches.get(
webset_id=input_data.webset_id, id=input_data.search_id
)
@@ -523,7 +523,7 @@ class ExaCancelWebsetSearchBlock(Block):
# Use AsyncExa SDK
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
canceled_search = await aexa.websets.searches.cancel(
canceled_search = aexa.websets.searches.cancel(
webset_id=input_data.webset_id, id=input_data.search_id
)
@@ -604,7 +604,7 @@ class ExaFindOrCreateSearchBlock(Block):
aexa = AsyncExa(api_key=credentials.api_key.get_secret_value())
# Get webset to check existing searches
webset = await aexa.websets.get(id=input_data.webset_id)
webset = aexa.websets.get(id=input_data.webset_id)
# Look for existing search with same query
existing_search = None
@@ -636,7 +636,7 @@ class ExaFindOrCreateSearchBlock(Block):
if input_data.entity_type != SearchEntityType.AUTO:
payload["entity"] = {"type": input_data.entity_type.value}
sdk_search = await aexa.websets.searches.create(
sdk_search = aexa.websets.searches.create(
webset_id=input_data.webset_id, params=payload
)

View File

@@ -531,12 +531,12 @@ class LLMResponse(BaseModel):
def convert_openai_tool_fmt_to_anthropic(
openai_tools: list[dict] | None = None,
) -> Iterable[ToolParam] | anthropic.Omit:
) -> Iterable[ToolParam] | anthropic.NotGiven:
"""
Convert OpenAI tool format to Anthropic tool format.
"""
if not openai_tools or len(openai_tools) == 0:
return anthropic.omit
return anthropic.NOT_GIVEN
anthropic_tools = []
for tool in openai_tools:
@@ -596,10 +596,10 @@ def extract_openai_tool_calls(response) -> list[ToolContentBlock] | None:
def get_parallel_tool_calls_param(
llm_model: LlmModel, parallel_tool_calls: bool | None
) -> bool | openai.Omit:
):
"""Get the appropriate parallel_tool_calls parameter for OpenAI-compatible APIs."""
if llm_model.startswith("o") or parallel_tool_calls is None:
return openai.omit
return openai.NOT_GIVEN
return parallel_tool_calls

View File

@@ -319,6 +319,8 @@ class BlockSchema(BaseModel):
"credentials_provider": [config.get("provider", "google")],
"credentials_types": [config.get("type", "oauth2")],
"credentials_scopes": config.get("scopes"),
"is_auto_credential": True,
"input_field_name": info["field_name"],
}
result[kwarg_name] = CredentialsFieldInfo.model_validate(
auto_schema, by_alias=True

View File

@@ -1,8 +1,9 @@
import logging
import queue
from collections import defaultdict
from datetime import datetime, timedelta, timezone
from enum import Enum
from multiprocessing import Manager
from queue import Empty
from typing import (
TYPE_CHECKING,
Annotated,
@@ -1199,16 +1200,12 @@ class NodeExecutionEntry(BaseModel):
class ExecutionQueue(Generic[T]):
"""
Thread-safe queue for managing node execution within a single graph execution.
Note: Uses queue.Queue (not multiprocessing.Queue) since all access is from
threads within the same process. If migrating back to ProcessPoolExecutor,
replace with multiprocessing.Manager().Queue() for cross-process safety.
Queue for managing the execution of agents.
This will be shared between different processes
"""
def __init__(self):
# Thread-safe queue (not multiprocessing) — see class docstring
self.queue: queue.Queue[T] = queue.Queue()
self.queue = Manager().Queue()
def add(self, execution: T) -> T:
self.queue.put(execution)
@@ -1223,7 +1220,7 @@ class ExecutionQueue(Generic[T]):
def get_or_none(self) -> T | None:
try:
return self.queue.get_nowait()
except queue.Empty:
except Empty:
return None

View File

@@ -1,58 +0,0 @@
"""Tests for ExecutionQueue thread-safety."""
import queue
import threading
from backend.data.execution import ExecutionQueue
def test_execution_queue_uses_stdlib_queue():
"""Verify ExecutionQueue uses queue.Queue (not multiprocessing)."""
q = ExecutionQueue()
assert isinstance(q.queue, queue.Queue)
def test_basic_operations():
"""Test add, get, empty, and get_or_none."""
q = ExecutionQueue()
assert q.empty() is True
assert q.get_or_none() is None
result = q.add("item1")
assert result == "item1"
assert q.empty() is False
item = q.get()
assert item == "item1"
assert q.empty() is True
def test_thread_safety():
"""Test concurrent access from multiple threads."""
q = ExecutionQueue()
results = []
num_items = 100
def producer():
for i in range(num_items):
q.add(f"item_{i}")
def consumer():
count = 0
while count < num_items:
item = q.get_or_none()
if item is not None:
results.append(item)
count += 1
producer_thread = threading.Thread(target=producer)
consumer_thread = threading.Thread(target=consumer)
producer_thread.start()
consumer_thread.start()
producer_thread.join(timeout=5)
consumer_thread.join(timeout=5)
assert len(results) == num_items

View File

@@ -447,8 +447,7 @@ class GraphModel(Graph, GraphMeta):
@computed_field
@property
def credentials_input_schema(self) -> dict[str, Any]:
graph_credentials_inputs = self.aggregate_credentials_inputs()
graph_credentials_inputs = self.regular_credentials_inputs
logger.debug(
f"Combined credentials input fields for graph #{self.id} ({self.name}): "
f"{graph_credentials_inputs}"
@@ -604,6 +603,28 @@ class GraphModel(Graph, GraphMeta):
for key, (field_info, node_field_pairs) in combined.items()
}
@property
def regular_credentials_inputs(
self,
) -> dict[str, tuple[CredentialsFieldInfo, set[tuple[str, str]], bool]]:
"""Credentials that need explicit user mapping (CredentialsMetaInput fields)."""
return {
k: v
for k, v in self.aggregate_credentials_inputs().items()
if not v[0].is_auto_credential
}
@property
def auto_credentials_inputs(
self,
) -> dict[str, tuple[CredentialsFieldInfo, set[tuple[str, str]], bool]]:
"""Credentials embedded in file fields (_credentials_id), resolved at execution time."""
return {
k: v
for k, v in self.aggregate_credentials_inputs().items()
if v[0].is_auto_credential
}
def reassign_ids(self, user_id: str, reassign_graph_id: bool = False):
"""
Reassigns all IDs in the graph to new UUIDs.
@@ -654,6 +675,16 @@ class GraphModel(Graph, GraphMeta):
) and graph_id in graph_id_map:
node.input_default["graph_id"] = graph_id_map[graph_id]
# Clear auto-credentials references (e.g., _credentials_id in
# GoogleDriveFile fields) so the new user must re-authenticate
# with their own account
for node in graph.nodes:
if not node.input_default:
continue
for key, value in node.input_default.items():
if isinstance(value, dict) and "_credentials_id" in value:
del value["_credentials_id"]
def validate_graph(
self,
for_run: bool = False,

View File

@@ -463,3 +463,329 @@ def test_node_credentials_optional_with_other_metadata():
assert node.credentials_optional is True
assert node.metadata["position"] == {"x": 100, "y": 200}
assert node.metadata["customized_name"] == "My Custom Node"
# ============================================================================
# Tests for CredentialsFieldInfo.combine() field propagation
def test_combine_preserves_is_auto_credential_flag():
"""
CredentialsFieldInfo.combine() must propagate is_auto_credential and
input_field_name to the combined result. Regression test for reviewer
finding that combine() dropped these fields.
"""
from backend.data.model import CredentialsFieldInfo
auto_field = CredentialsFieldInfo.model_validate(
{
"credentials_provider": ["google"],
"credentials_types": ["oauth2"],
"credentials_scopes": ["drive.readonly"],
"is_auto_credential": True,
"input_field_name": "spreadsheet",
},
by_alias=True,
)
# combine() takes *args of (field_info, key) tuples
combined = CredentialsFieldInfo.combine(
(auto_field, ("node-1", "credentials")),
(auto_field, ("node-2", "credentials")),
)
assert len(combined) == 1
group_key = next(iter(combined))
combined_info, combined_keys = combined[group_key]
assert combined_info.is_auto_credential is True
assert combined_info.input_field_name == "spreadsheet"
assert combined_keys == {("node-1", "credentials"), ("node-2", "credentials")}
def test_combine_preserves_regular_credential_defaults():
"""Regular credentials should have is_auto_credential=False after combine()."""
from backend.data.model import CredentialsFieldInfo
regular_field = CredentialsFieldInfo.model_validate(
{
"credentials_provider": ["github"],
"credentials_types": ["api_key"],
"is_auto_credential": False,
},
by_alias=True,
)
combined = CredentialsFieldInfo.combine(
(regular_field, ("node-1", "credentials")),
)
group_key = next(iter(combined))
combined_info, _ = combined[group_key]
assert combined_info.is_auto_credential is False
assert combined_info.input_field_name is None
# ============================================================================
# Tests for _reassign_ids credential clearing (Fix 3: SECRT-1772)
def test_reassign_ids_clears_credentials_id():
"""
[SECRT-1772] _reassign_ids should clear _credentials_id from
GoogleDriveFile-style input_default fields so forked agents
don't retain the original creator's credential references.
"""
from backend.data.graph import GraphModel
node = Node(
id="node-1",
block_id=StoreValueBlock().id,
input_default={
"spreadsheet": {
"_credentials_id": "original-cred-id",
"id": "file-123",
"name": "test.xlsx",
"mimeType": "application/vnd.google-apps.spreadsheet",
"url": "https://docs.google.com/spreadsheets/d/file-123",
},
},
)
graph = Graph(
id="test-graph",
name="Test",
description="Test",
nodes=[node],
links=[],
)
GraphModel._reassign_ids(graph, user_id="new-user", graph_id_map={})
# _credentials_id key should be removed (not set to None) so that
# _acquire_auto_credentials correctly errors instead of treating it as chained data
assert "_credentials_id" not in graph.nodes[0].input_default["spreadsheet"]
def test_reassign_ids_preserves_non_credential_fields():
"""
Regression guard: _reassign_ids should NOT modify non-credential fields
like name, mimeType, id, url.
"""
from backend.data.graph import GraphModel
node = Node(
id="node-1",
block_id=StoreValueBlock().id,
input_default={
"spreadsheet": {
"_credentials_id": "cred-abc",
"id": "file-123",
"name": "test.xlsx",
"mimeType": "application/vnd.google-apps.spreadsheet",
"url": "https://docs.google.com/spreadsheets/d/file-123",
},
},
)
graph = Graph(
id="test-graph",
name="Test",
description="Test",
nodes=[node],
links=[],
)
GraphModel._reassign_ids(graph, user_id="new-user", graph_id_map={})
field = graph.nodes[0].input_default["spreadsheet"]
assert field["id"] == "file-123"
assert field["name"] == "test.xlsx"
assert field["mimeType"] == "application/vnd.google-apps.spreadsheet"
assert field["url"] == "https://docs.google.com/spreadsheets/d/file-123"
def test_reassign_ids_handles_no_credentials():
"""
Regression guard: _reassign_ids should not error when input_default
has no dict fields with _credentials_id.
"""
from backend.data.graph import GraphModel
node = Node(
id="node-1",
block_id=StoreValueBlock().id,
input_default={
"input": "some value",
"another_input": 42,
},
)
graph = Graph(
id="test-graph",
name="Test",
description="Test",
nodes=[node],
links=[],
)
GraphModel._reassign_ids(graph, user_id="new-user", graph_id_map={})
# Should not error, fields unchanged
assert graph.nodes[0].input_default["input"] == "some value"
assert graph.nodes[0].input_default["another_input"] == 42
def test_reassign_ids_handles_multiple_credential_fields():
"""
[SECRT-1772] When a node has multiple dict fields with _credentials_id,
ALL of them should be cleared.
"""
from backend.data.graph import GraphModel
node = Node(
id="node-1",
block_id=StoreValueBlock().id,
input_default={
"spreadsheet": {
"_credentials_id": "cred-1",
"id": "file-1",
"name": "file1.xlsx",
},
"doc_file": {
"_credentials_id": "cred-2",
"id": "file-2",
"name": "file2.docx",
},
"plain_input": "not a dict",
},
)
graph = Graph(
id="test-graph",
name="Test",
description="Test",
nodes=[node],
links=[],
)
GraphModel._reassign_ids(graph, user_id="new-user", graph_id_map={})
assert "_credentials_id" not in graph.nodes[0].input_default["spreadsheet"]
assert "_credentials_id" not in graph.nodes[0].input_default["doc_file"]
assert graph.nodes[0].input_default["plain_input"] == "not a dict"
# ============================================================================
# Tests for discriminate() field propagation
def test_discriminate_preserves_is_auto_credential_flag():
"""
CredentialsFieldInfo.discriminate() must propagate is_auto_credential and
input_field_name to the discriminated result. Regression test for
discriminate() dropping these fields (same class of bug as combine()).
"""
from backend.data.model import CredentialsFieldInfo
auto_field = CredentialsFieldInfo.model_validate(
{
"credentials_provider": ["google", "openai"],
"credentials_types": ["oauth2"],
"credentials_scopes": ["drive.readonly"],
"is_auto_credential": True,
"input_field_name": "spreadsheet",
"discriminator": "model",
"discriminator_mapping": {"gpt-4": "openai", "gemini": "google"},
},
by_alias=True,
)
discriminated = auto_field.discriminate("gemini")
assert discriminated.is_auto_credential is True
assert discriminated.input_field_name == "spreadsheet"
assert discriminated.provider == frozenset(["google"])
def test_discriminate_preserves_regular_credential_defaults():
"""Regular credentials should have is_auto_credential=False after discriminate()."""
from backend.data.model import CredentialsFieldInfo
regular_field = CredentialsFieldInfo.model_validate(
{
"credentials_provider": ["google", "openai"],
"credentials_types": ["api_key"],
"is_auto_credential": False,
"discriminator": "model",
"discriminator_mapping": {"gpt-4": "openai", "gemini": "google"},
},
by_alias=True,
)
discriminated = regular_field.discriminate("gpt-4")
assert discriminated.is_auto_credential is False
assert discriminated.input_field_name is None
assert discriminated.provider == frozenset(["openai"])
# ============================================================================
# Tests for credentials_input_schema excluding auto_credentials
def test_credentials_input_schema_excludes_auto_creds():
"""
GraphModel.credentials_input_schema should exclude auto_credentials
(is_auto_credential=True) from the schema. Auto_credentials are
transparently resolved at execution time via file picker data.
"""
from datetime import datetime, timezone
from unittest.mock import PropertyMock, patch
from backend.data.graph import GraphModel, NodeModel
from backend.data.model import CredentialsFieldInfo
regular_field_info = CredentialsFieldInfo.model_validate(
{
"credentials_provider": ["github"],
"credentials_types": ["api_key"],
"is_auto_credential": False,
},
by_alias=True,
)
graph = GraphModel(
id="test-graph",
version=1,
name="Test",
description="Test",
user_id="test-user",
created_at=datetime.now(timezone.utc),
nodes=[
NodeModel(
id="node-1",
block_id=StoreValueBlock().id,
input_default={},
graph_id="test-graph",
graph_version=1,
),
],
links=[],
)
# Mock regular_credentials_inputs to return only the non-auto field (3-tuple)
regular_only = {
"github_credentials": (
regular_field_info,
{("node-1", "credentials")},
True,
),
}
with patch.object(
type(graph),
"regular_credentials_inputs",
new_callable=PropertyMock,
return_value=regular_only,
):
schema = graph.credentials_input_schema
field_names = set(schema.get("properties", {}).keys())
# Should include regular credential but NOT auto_credential
assert "github_credentials" in field_names
assert "google_credentials" not in field_names

View File

@@ -571,6 +571,8 @@ class CredentialsFieldInfo(BaseModel, Generic[CP, CT]):
discriminator: Optional[str] = None
discriminator_mapping: Optional[dict[str, CP]] = None
discriminator_values: set[Any] = Field(default_factory=set)
is_auto_credential: bool = False
input_field_name: Optional[str] = None
@classmethod
def combine(
@@ -651,6 +653,9 @@ class CredentialsFieldInfo(BaseModel, Generic[CP, CT]):
+ "_credentials"
)
# Propagate is_auto_credential from the combined field.
# All fields in a group should share the same is_auto_credential
# value since auto and regular credentials serve different purposes.
result[group_key] = (
CredentialsFieldInfo[CP, CT](
credentials_provider=combined.provider,
@@ -659,6 +664,8 @@ class CredentialsFieldInfo(BaseModel, Generic[CP, CT]):
discriminator=combined.discriminator,
discriminator_mapping=combined.discriminator_mapping,
discriminator_values=set(all_discriminator_values),
is_auto_credential=combined.is_auto_credential,
input_field_name=combined.input_field_name,
),
combined_keys,
)
@@ -684,6 +691,8 @@ class CredentialsFieldInfo(BaseModel, Generic[CP, CT]):
discriminator=self.discriminator,
discriminator_mapping=self.discriminator_mapping,
discriminator_values=self.discriminator_values,
is_auto_credential=self.is_auto_credential,
input_field_name=self.input_field_name,
)

View File

@@ -1,4 +1,3 @@
import asyncio
import logging
from abc import ABC, abstractmethod
from enum import Enum
@@ -226,10 +225,6 @@ class SyncRabbitMQ(RabbitMQBase):
class AsyncRabbitMQ(RabbitMQBase):
"""Asynchronous RabbitMQ client"""
def __init__(self, config: RabbitMQConfig):
super().__init__(config)
self._reconnect_lock: asyncio.Lock | None = None
@property
def is_connected(self) -> bool:
return bool(self._connection and not self._connection.is_closed)
@@ -240,17 +235,7 @@ class AsyncRabbitMQ(RabbitMQBase):
@conn_retry("AsyncRabbitMQ", "Acquiring async connection")
async def connect(self):
if self.is_connected and self._channel and not self._channel.is_closed:
return
if (
self.is_connected
and self._connection
and (self._channel is None or self._channel.is_closed)
):
self._channel = await self._connection.channel()
await self._channel.set_qos(prefetch_count=1)
await self.declare_infrastructure()
if self.is_connected:
return
self._connection = await aio_pika.connect_robust(
@@ -306,46 +291,24 @@ class AsyncRabbitMQ(RabbitMQBase):
exchange, routing_key=queue.routing_key or queue.name
)
@property
def _lock(self) -> asyncio.Lock:
if self._reconnect_lock is None:
self._reconnect_lock = asyncio.Lock()
return self._reconnect_lock
async def _ensure_channel(self) -> aio_pika.abc.AbstractChannel:
"""Get a valid channel, reconnecting if the current one is stale.
Uses a lock to prevent concurrent reconnection attempts from racing.
"""
if self.is_ready:
return self._channel # type: ignore # is_ready guarantees non-None
async with self._lock:
# Double-check after acquiring lock
if self.is_ready:
return self._channel # type: ignore
self._channel = None
await self.connect()
if self._channel is None:
raise RuntimeError("Channel should be established after connect")
return self._channel
async def _publish_once(
@func_retry
async def publish_message(
self,
routing_key: str,
message: str,
exchange: Optional[Exchange] = None,
persistent: bool = True,
) -> None:
channel = await self._ensure_channel()
if not self.is_ready:
await self.connect()
if self._channel is None:
raise RuntimeError("Channel should be established after connect")
if exchange:
exchange_obj = await channel.get_exchange(exchange.name)
exchange_obj = await self._channel.get_exchange(exchange.name)
else:
exchange_obj = channel.default_exchange
exchange_obj = self._channel.default_exchange
await exchange_obj.publish(
aio_pika.Message(
@@ -359,23 +322,9 @@ class AsyncRabbitMQ(RabbitMQBase):
routing_key=routing_key,
)
@func_retry
async def publish_message(
self,
routing_key: str,
message: str,
exchange: Optional[Exchange] = None,
persistent: bool = True,
) -> None:
try:
await self._publish_once(routing_key, message, exchange, persistent)
except aio_pika.exceptions.ChannelInvalidStateError:
logger.warning(
"RabbitMQ channel invalid, forcing reconnect and retrying publish"
)
async with self._lock:
self._channel = None
await self._publish_once(routing_key, message, exchange, persistent)
async def get_channel(self) -> aio_pika.abc.AbstractChannel:
return await self._ensure_channel()
if not self.is_ready:
await self.connect()
if self._channel is None:
raise RuntimeError("Channel should be established after connect")
return self._channel

View File

@@ -92,6 +92,7 @@ from .utils import (
block_usage_cost,
create_execution_queue_config,
execution_usage_cost,
parse_auto_credential_field,
validate_exec,
)
@@ -172,6 +173,60 @@ def execute_graph(
T = TypeVar("T")
async def _acquire_auto_credentials(
input_model: type[BlockSchema],
input_data: dict[str, Any],
creds_manager: "IntegrationCredentialsManager",
user_id: str,
) -> tuple[dict[str, Any], list[AsyncRedisLock]]:
"""
Resolve auto_credentials from GoogleDriveFileField-style inputs.
Returns:
(extra_exec_kwargs, locks): kwargs to inject into block execution, and
credential locks to release after execution completes.
"""
extra_exec_kwargs: dict[str, Any] = {}
locks: list[AsyncRedisLock] = []
# NOTE: If a block ever has multiple auto-credential fields, a ValueError
# on a later field will strand locks acquired for earlier fields. They'll
# auto-expire via Redis TTL, but add a try/except to release partial locks
# if that becomes a real scenario.
for kwarg_name, info in input_model.get_auto_credentials_fields().items():
field_name = info["field_name"]
field_data = input_data.get(field_name)
parsed = parse_auto_credential_field(
field_name=field_name,
info=info,
field_data=field_data,
field_present_in_input=field_name in input_data,
)
if parsed.error:
raise ValueError(parsed.error)
if parsed.cred_id:
# Credential ID provided - acquire credentials
try:
credentials, lock = await creds_manager.acquire(user_id, parsed.cred_id)
locks.append(lock)
extra_exec_kwargs[kwarg_name] = credentials
except ValueError:
raise ValueError(
f"{parsed.provider.capitalize()} credentials for "
f"'{parsed.file_name}' in field '{parsed.field_name}' are not "
f"available in your account. "
f"This can happen if the agent was created by another "
f"user or the credentials were deleted. "
f"Please open the agent in the builder and re-select "
f"the file to authenticate with your own account."
)
return extra_exec_kwargs, locks
async def execute_node(
node: Node,
data: NodeExecutionEntry,
@@ -271,41 +326,14 @@ async def execute_node(
extra_exec_kwargs[field_name] = credentials
# Handle auto-generated credentials (e.g., from GoogleDriveFileInput)
for kwarg_name, info in input_model.get_auto_credentials_fields().items():
field_name = info["field_name"]
field_data = input_data.get(field_name)
if field_data and isinstance(field_data, dict):
# Check if _credentials_id key exists in the field data
if "_credentials_id" in field_data:
cred_id = field_data["_credentials_id"]
if cred_id:
# Credential ID provided - acquire credentials
provider = info.get("config", {}).get(
"provider", "external service"
)
file_name = field_data.get("name", "selected file")
try:
credentials, lock = await creds_manager.acquire(
user_id, cred_id
)
creds_locks.append(lock)
extra_exec_kwargs[kwarg_name] = credentials
except ValueError:
# Credential was deleted or doesn't exist
raise ValueError(
f"Authentication expired for '{file_name}' in field '{field_name}'. "
f"The saved {provider.capitalize()} credentials no longer exist. "
f"Please re-select the file to re-authenticate."
)
# else: _credentials_id is explicitly None, skip credentials (for chained data)
else:
# _credentials_id key missing entirely - this is an error
provider = info.get("config", {}).get("provider", "external service")
file_name = field_data.get("name", "selected file")
raise ValueError(
f"Authentication missing for '{file_name}' in field '{field_name}'. "
f"Please re-select the file to authenticate with {provider.capitalize()}."
)
auto_extra_kwargs, auto_locks = await _acquire_auto_credentials(
input_model=input_model,
input_data=input_data,
creds_manager=creds_manager,
user_id=user_id,
)
extra_exec_kwargs.update(auto_extra_kwargs)
creds_locks.extend(auto_locks)
output_size = 0

View File

@@ -0,0 +1,320 @@
"""
Tests for auto_credentials handling in execute_node().
These test the _acquire_auto_credentials() helper function extracted from
execute_node() (manager.py lines 273-308).
"""
import pytest
from pytest_mock import MockerFixture
@pytest.fixture
def google_drive_file_data():
return {
"valid": {
"_credentials_id": "cred-id-123",
"id": "file-123",
"name": "test.xlsx",
"mimeType": "application/vnd.google-apps.spreadsheet",
},
"chained": {
"_credentials_id": None,
"id": "file-456",
"name": "chained.xlsx",
"mimeType": "application/vnd.google-apps.spreadsheet",
},
"missing_key": {
"id": "file-789",
"name": "bad.xlsx",
"mimeType": "application/vnd.google-apps.spreadsheet",
},
}
@pytest.fixture
def mock_input_model(mocker: MockerFixture):
"""Create a mock input model with get_auto_credentials_fields() returning one field."""
input_model = mocker.MagicMock()
input_model.get_auto_credentials_fields.return_value = {
"credentials": {
"field_name": "spreadsheet",
"config": {
"provider": "google",
"type": "oauth2",
"scopes": ["https://www.googleapis.com/auth/drive.readonly"],
},
}
}
return input_model
@pytest.fixture
def mock_creds_manager(mocker: MockerFixture):
manager = mocker.AsyncMock()
mock_lock = mocker.AsyncMock()
mock_creds = mocker.MagicMock()
mock_creds.id = "cred-id-123"
mock_creds.provider = "google"
manager.acquire.return_value = (mock_creds, mock_lock)
return manager, mock_creds, mock_lock
@pytest.mark.asyncio
async def test_auto_credentials_happy_path(
mocker: MockerFixture,
google_drive_file_data,
mock_input_model,
mock_creds_manager,
):
"""When field_data has a valid _credentials_id, credentials should be acquired."""
from backend.executor.manager import _acquire_auto_credentials
manager, mock_creds, mock_lock = mock_creds_manager
input_data = {"spreadsheet": google_drive_file_data["valid"]}
extra_kwargs, locks = await _acquire_auto_credentials(
input_model=mock_input_model,
input_data=input_data,
creds_manager=manager,
user_id="user-1",
)
manager.acquire.assert_called_once_with("user-1", "cred-id-123")
assert extra_kwargs["credentials"] == mock_creds
assert mock_lock in locks
@pytest.mark.asyncio
async def test_auto_credentials_field_none_static_raises(
mocker: MockerFixture,
mock_input_model,
mock_creds_manager,
):
"""
[THE BUG FIX TEST — OPEN-2895]
When field_data is None and the key IS in input_data (user didn't select a file),
should raise ValueError instead of silently skipping.
"""
from backend.executor.manager import _acquire_auto_credentials
manager, _, _ = mock_creds_manager
# Key is present but value is None = user didn't select a file
input_data = {"spreadsheet": None}
with pytest.raises(ValueError, match="No file selected"):
await _acquire_auto_credentials(
input_model=mock_input_model,
input_data=input_data,
creds_manager=manager,
user_id="user-1",
)
@pytest.mark.asyncio
async def test_auto_credentials_field_absent_skips(
mocker: MockerFixture,
mock_input_model,
mock_creds_manager,
):
"""
When the field key is NOT in input_data at all (upstream connection),
should skip without error.
"""
from backend.executor.manager import _acquire_auto_credentials
manager, _, _ = mock_creds_manager
# Key not present = connected from upstream block
input_data = {}
extra_kwargs, locks = await _acquire_auto_credentials(
input_model=mock_input_model,
input_data=input_data,
creds_manager=manager,
user_id="user-1",
)
manager.acquire.assert_not_called()
assert "credentials" not in extra_kwargs
assert locks == []
@pytest.mark.asyncio
async def test_auto_credentials_chained_cred_id_none(
mocker: MockerFixture,
google_drive_file_data,
mock_input_model,
mock_creds_manager,
):
"""
When _credentials_id is explicitly None (chained data from upstream),
should skip credential acquisition.
"""
from backend.executor.manager import _acquire_auto_credentials
manager, _, _ = mock_creds_manager
input_data = {"spreadsheet": google_drive_file_data["chained"]}
extra_kwargs, locks = await _acquire_auto_credentials(
input_model=mock_input_model,
input_data=input_data,
creds_manager=manager,
user_id="user-1",
)
manager.acquire.assert_not_called()
assert "credentials" not in extra_kwargs
@pytest.mark.asyncio
async def test_auto_credentials_missing_cred_id_key_raises(
mocker: MockerFixture,
google_drive_file_data,
mock_input_model,
mock_creds_manager,
):
"""
When _credentials_id key is missing entirely from field_data dict,
should raise ValueError.
"""
from backend.executor.manager import _acquire_auto_credentials
manager, _, _ = mock_creds_manager
input_data = {"spreadsheet": google_drive_file_data["missing_key"]}
with pytest.raises(ValueError, match="Authentication missing"):
await _acquire_auto_credentials(
input_model=mock_input_model,
input_data=input_data,
creds_manager=manager,
user_id="user-1",
)
@pytest.mark.asyncio
async def test_auto_credentials_ownership_mismatch_error(
mocker: MockerFixture,
google_drive_file_data,
mock_input_model,
mock_creds_manager,
):
"""
[SECRT-1772] When acquire() raises ValueError (credential belongs to another user),
the error message should mention 'not available' (not 'expired').
"""
from backend.executor.manager import _acquire_auto_credentials
manager, _, _ = mock_creds_manager
manager.acquire.side_effect = ValueError(
"Credentials #cred-id-123 for user #user-2 not found"
)
input_data = {"spreadsheet": google_drive_file_data["valid"]}
with pytest.raises(ValueError, match="not available in your account"):
await _acquire_auto_credentials(
input_model=mock_input_model,
input_data=input_data,
creds_manager=manager,
user_id="user-2",
)
@pytest.mark.asyncio
async def test_auto_credentials_deleted_credential_error(
mocker: MockerFixture,
google_drive_file_data,
mock_input_model,
mock_creds_manager,
):
"""
[SECRT-1772] When acquire() raises ValueError (credential was deleted),
the error message should mention 'not available' (not 'expired').
"""
from backend.executor.manager import _acquire_auto_credentials
manager, _, _ = mock_creds_manager
manager.acquire.side_effect = ValueError(
"Credentials #cred-id-123 for user #user-1 not found"
)
input_data = {"spreadsheet": google_drive_file_data["valid"]}
with pytest.raises(ValueError, match="not available in your account"):
await _acquire_auto_credentials(
input_model=mock_input_model,
input_data=input_data,
creds_manager=manager,
user_id="user-1",
)
@pytest.mark.asyncio
async def test_auto_credentials_lock_appended(
mocker: MockerFixture,
google_drive_file_data,
mock_input_model,
mock_creds_manager,
):
"""Lock from acquire() should be included in returned locks list."""
from backend.executor.manager import _acquire_auto_credentials
manager, _, mock_lock = mock_creds_manager
input_data = {"spreadsheet": google_drive_file_data["valid"]}
extra_kwargs, locks = await _acquire_auto_credentials(
input_model=mock_input_model,
input_data=input_data,
creds_manager=manager,
user_id="user-1",
)
assert len(locks) == 1
assert locks[0] is mock_lock
@pytest.mark.asyncio
async def test_auto_credentials_multiple_fields(
mocker: MockerFixture,
mock_creds_manager,
):
"""When there are multiple auto_credentials fields, only valid ones should acquire."""
from backend.executor.manager import _acquire_auto_credentials
manager, mock_creds, mock_lock = mock_creds_manager
input_model = mocker.MagicMock()
input_model.get_auto_credentials_fields.return_value = {
"credentials": {
"field_name": "spreadsheet",
"config": {"provider": "google", "type": "oauth2"},
},
"credentials2": {
"field_name": "doc_file",
"config": {"provider": "google", "type": "oauth2"},
},
}
input_data = {
"spreadsheet": {
"_credentials_id": "cred-id-123",
"id": "file-1",
"name": "file1.xlsx",
},
"doc_file": {
"_credentials_id": None,
"id": "file-2",
"name": "chained.doc",
},
}
extra_kwargs, locks = await _acquire_auto_credentials(
input_model=input_model,
input_data=input_data,
creds_manager=manager,
user_id="user-1",
)
# Only the first field should have acquired credentials
manager.acquire.assert_called_once_with("user-1", "cred-id-123")
assert "credentials" in extra_kwargs
assert "credentials2" not in extra_kwargs
assert len(locks) == 1

View File

@@ -4,7 +4,7 @@ import threading
import time
from collections import defaultdict
from concurrent.futures import Future
from typing import Mapping, Optional, cast
from typing import Any, Mapping, Optional, cast
from pydantic import BaseModel, JsonValue, ValidationError
@@ -55,6 +55,87 @@ from backend.util.type import convert
config = Config()
logger = TruncatedLogger(logging.getLogger(__name__), prefix="[GraphExecutorUtil]")
# ============ Auto-Credentials Helpers ============ #
class AutoCredentialFieldInfo(BaseModel):
"""Parsed info from an auto-credential field (e.g., GoogleDriveFileField)."""
cred_id: str | None
"""The credential ID to use, or None if not provided."""
provider: str
"""The provider name (e.g., 'google')."""
file_name: str
"""The display name for error messages."""
field_name: str
"""The original field name in the schema."""
error: str | None = None
"""Validation error message, if any."""
def parse_auto_credential_field(
field_name: str,
info: dict,
field_data: Any,
*,
field_present_in_input: bool = True,
) -> AutoCredentialFieldInfo:
"""
Parse auto-credential field data and extract credential info.
This is shared logic used by both credential acquisition (manager.py)
and credential validation (utils.py).
Args:
field_name: The name of the field in the schema
info: The auto_credentials field info from get_auto_credentials_fields()
field_data: The actual field data from input
field_present_in_input: Whether the field key exists in input_data
Returns:
AutoCredentialFieldInfo with parsed data and any validation errors
"""
provider = info.get("config", {}).get("provider", "external service")
file_name = (
field_data.get("name", "selected file")
if isinstance(field_data, dict)
else "selected file"
)
result = AutoCredentialFieldInfo(
cred_id=None,
provider=provider,
file_name=file_name,
field_name=field_name,
)
if field_data and isinstance(field_data, dict):
if "_credentials_id" not in field_data:
# Key removed (e.g., on fork) — needs re-auth
result.error = (
f"Authentication missing for '{file_name}' in field "
f"'{field_name}'. Please re-select the file to authenticate "
f"with {provider.capitalize()}."
)
else:
cred_id = field_data.get("_credentials_id")
if cred_id:
result.cred_id = cred_id
# else: _credentials_id is explicitly None, skip (chained data)
elif field_data is None and not field_present_in_input:
# Field not in input_data at all = connected from upstream block, skip
pass
elif field_present_in_input:
# field_data is None/empty but key IS in input_data = user didn't select
result.error = (
f"No file selected for '{field_name}'. "
f"Please select a file to provide "
f"{provider.capitalize()} authentication."
)
return result
# ============ Resource Helpers ============ #
@@ -259,7 +340,8 @@ async def _validate_node_input_credentials(
# Find any fields of type CredentialsMetaInput
credentials_fields = block.input_schema.get_credentials_fields()
if not credentials_fields:
auto_credentials_fields = block.input_schema.get_auto_credentials_fields()
if not credentials_fields and not auto_credentials_fields:
continue
# Track if any credential field is missing for this node
@@ -339,6 +421,52 @@ async def _validate_node_input_credentials(
] = "Invalid credentials: type/provider mismatch"
continue
# Validate auto-credentials (GoogleDriveFileField-based)
# These have _credentials_id embedded in the file field data
if auto_credentials_fields:
for _kwarg_name, info in auto_credentials_fields.items():
field_name = info["field_name"]
# Check input_default and nodes_input_masks for the field value
field_value = node.input_default.get(field_name)
if nodes_input_masks and node.id in nodes_input_masks:
field_value = nodes_input_masks[node.id].get(
field_name, field_value
)
# Use shared helper to parse the field
parsed = parse_auto_credential_field(
field_name=field_name,
info=info,
field_data=field_value,
field_present_in_input=True, # For validation, assume present
)
if parsed.error:
has_missing_credentials = True
credential_errors[node.id][field_name] = parsed.error
continue
if parsed.cred_id:
# Validate that credentials exist and are accessible
try:
creds_store = get_integration_credentials_store()
creds = await creds_store.get_creds_by_id(
user_id, parsed.cred_id
)
except Exception as e:
has_missing_credentials = True
credential_errors[node.id][
field_name
] = f"Credentials not available: {e}"
continue
if not creds:
has_missing_credentials = True
credential_errors[node.id][field_name] = (
"The saved credentials are not available "
"for your account. Please re-select the file to "
"authenticate with your own account."
)
# If node has optional credentials and any are missing, mark for skipping
# But only if there are no other errors for this node
if (
@@ -370,8 +498,9 @@ def make_node_credentials_input_map(
"""
result: dict[str, dict[str, JsonValue]] = {}
# Get aggregated credentials fields for the graph
graph_cred_inputs = graph.aggregate_credentials_inputs()
# Only map regular credentials (not auto_credentials, which are resolved
# at execution time from _credentials_id in file field data)
graph_cred_inputs = graph.regular_credentials_inputs
for graph_input_name, (_, compatible_node_fields, _) in graph_cred_inputs.items():
# Best-effort map: skip missing items

View File

@@ -907,3 +907,335 @@ async def test_stop_graph_execution_cascades_to_child_with_reviews(
# Verify both parent and child status updates
assert mock_execution_db.update_graph_execution_stats.call_count >= 1
# ============================================================================
# Tests for auto_credentials validation in _validate_node_input_credentials
# (Fix 3: SECRT-1772 + Fix 4: Path 4)
# ============================================================================
@pytest.mark.asyncio
async def test_validate_node_input_credentials_auto_creds_valid(
mocker: MockerFixture,
):
"""
[SECRT-1772] When a node has auto_credentials with a valid _credentials_id
that exists in the store, validation should pass without errors.
"""
from backend.executor.utils import _validate_node_input_credentials
mock_node = mocker.MagicMock()
mock_node.id = "node-with-auto-creds"
mock_node.credentials_optional = False
mock_node.input_default = {
"spreadsheet": {
"_credentials_id": "valid-cred-id",
"id": "file-123",
"name": "test.xlsx",
}
}
mock_block = mocker.MagicMock()
# No regular credentials fields
mock_block.input_schema.get_credentials_fields.return_value = {}
# Has auto_credentials fields
mock_block.input_schema.get_auto_credentials_fields.return_value = {
"credentials": {
"field_name": "spreadsheet",
"config": {"provider": "google", "type": "oauth2"},
}
}
mock_node.block = mock_block
mock_graph = mocker.MagicMock()
mock_graph.nodes = [mock_node]
# Mock the credentials store to return valid credentials
mock_store = mocker.MagicMock()
mock_creds = mocker.MagicMock()
mock_creds.id = "valid-cred-id"
mock_store.get_creds_by_id = mocker.AsyncMock(return_value=mock_creds)
mocker.patch(
"backend.executor.utils.get_integration_credentials_store",
return_value=mock_store,
)
errors, nodes_to_skip = await _validate_node_input_credentials(
graph=mock_graph,
user_id="test-user",
nodes_input_masks=None,
)
assert mock_node.id not in errors
assert mock_node.id not in nodes_to_skip
@pytest.mark.asyncio
async def test_validate_node_input_credentials_auto_creds_missing(
mocker: MockerFixture,
):
"""
[SECRT-1772] When a node has auto_credentials with a _credentials_id
that doesn't exist for the current user, validation should report an error.
"""
from backend.executor.utils import _validate_node_input_credentials
mock_node = mocker.MagicMock()
mock_node.id = "node-with-bad-auto-creds"
mock_node.credentials_optional = False
mock_node.input_default = {
"spreadsheet": {
"_credentials_id": "other-users-cred-id",
"id": "file-123",
"name": "test.xlsx",
}
}
mock_block = mocker.MagicMock()
mock_block.input_schema.get_credentials_fields.return_value = {}
mock_block.input_schema.get_auto_credentials_fields.return_value = {
"credentials": {
"field_name": "spreadsheet",
"config": {"provider": "google", "type": "oauth2"},
}
}
mock_node.block = mock_block
mock_graph = mocker.MagicMock()
mock_graph.nodes = [mock_node]
# Mock the credentials store to return None (cred not found for this user)
mock_store = mocker.MagicMock()
mock_store.get_creds_by_id = mocker.AsyncMock(return_value=None)
mocker.patch(
"backend.executor.utils.get_integration_credentials_store",
return_value=mock_store,
)
errors, nodes_to_skip = await _validate_node_input_credentials(
graph=mock_graph,
user_id="different-user",
nodes_input_masks=None,
)
assert mock_node.id in errors
assert "spreadsheet" in errors[mock_node.id]
assert "not available" in errors[mock_node.id]["spreadsheet"].lower()
@pytest.mark.asyncio
async def test_validate_node_input_credentials_both_regular_and_auto(
mocker: MockerFixture,
):
"""
[SECRT-1772] A node that has BOTH regular credentials AND auto_credentials
should have both validated.
"""
from backend.executor.utils import _validate_node_input_credentials
mock_node = mocker.MagicMock()
mock_node.id = "node-with-both-creds"
mock_node.credentials_optional = False
mock_node.input_default = {
"credentials": {
"id": "regular-cred-id",
"provider": "github",
"type": "api_key",
},
"spreadsheet": {
"_credentials_id": "auto-cred-id",
"id": "file-123",
"name": "test.xlsx",
},
}
mock_credentials_field_type = mocker.MagicMock()
mock_credentials_meta = mocker.MagicMock()
mock_credentials_meta.id = "regular-cred-id"
mock_credentials_meta.provider = "github"
mock_credentials_meta.type = "api_key"
mock_credentials_field_type.model_validate.return_value = mock_credentials_meta
mock_block = mocker.MagicMock()
# Regular credentials field
mock_block.input_schema.get_credentials_fields.return_value = {
"credentials": mock_credentials_field_type,
}
# Auto-credentials field
mock_block.input_schema.get_auto_credentials_fields.return_value = {
"auto_credentials": {
"field_name": "spreadsheet",
"config": {"provider": "google", "type": "oauth2"},
}
}
mock_node.block = mock_block
mock_graph = mocker.MagicMock()
mock_graph.nodes = [mock_node]
# Mock the credentials store to return valid credentials for both
mock_store = mocker.MagicMock()
mock_regular_creds = mocker.MagicMock()
mock_regular_creds.id = "regular-cred-id"
mock_regular_creds.provider = "github"
mock_regular_creds.type = "api_key"
mock_auto_creds = mocker.MagicMock()
mock_auto_creds.id = "auto-cred-id"
def get_creds_side_effect(user_id, cred_id):
if cred_id == "regular-cred-id":
return mock_regular_creds
elif cred_id == "auto-cred-id":
return mock_auto_creds
return None
mock_store.get_creds_by_id = mocker.AsyncMock(side_effect=get_creds_side_effect)
mocker.patch(
"backend.executor.utils.get_integration_credentials_store",
return_value=mock_store,
)
errors, nodes_to_skip = await _validate_node_input_credentials(
graph=mock_graph,
user_id="test-user",
nodes_input_masks=None,
)
# Both should validate successfully - no errors
assert mock_node.id not in errors
assert mock_node.id not in nodes_to_skip
@pytest.mark.asyncio
async def test_validate_node_input_credentials_auto_creds_skipped_when_none(
mocker: MockerFixture,
):
"""
When a node has auto_credentials but the field value has _credentials_id=None
(e.g., from upstream connection), validation should skip it without error.
"""
from backend.executor.utils import _validate_node_input_credentials
mock_node = mocker.MagicMock()
mock_node.id = "node-with-chained-auto-creds"
mock_node.credentials_optional = False
mock_node.input_default = {
"spreadsheet": {
"_credentials_id": None,
"id": "file-123",
"name": "test.xlsx",
}
}
mock_block = mocker.MagicMock()
mock_block.input_schema.get_credentials_fields.return_value = {}
mock_block.input_schema.get_auto_credentials_fields.return_value = {
"credentials": {
"field_name": "spreadsheet",
"config": {"provider": "google", "type": "oauth2"},
}
}
mock_node.block = mock_block
mock_graph = mocker.MagicMock()
mock_graph.nodes = [mock_node]
errors, nodes_to_skip = await _validate_node_input_credentials(
graph=mock_graph,
user_id="test-user",
nodes_input_masks=None,
)
# No error - chained data with None cred_id is valid
assert mock_node.id not in errors
# ============================================================================
# Tests for CredentialsFieldInfo auto_credential tag (Fix 4: Path 4)
# ============================================================================
def test_credentials_field_info_auto_credential_tag():
"""
[Path 4] CredentialsFieldInfo should support is_auto_credential and
input_field_name fields for distinguishing auto from regular credentials.
"""
from backend.data.model import CredentialsFieldInfo
# Regular credential should have is_auto_credential=False by default
regular = CredentialsFieldInfo.model_validate(
{
"credentials_provider": ["github"],
"credentials_types": ["api_key"],
},
by_alias=True,
)
assert regular.is_auto_credential is False
assert regular.input_field_name is None
# Auto credential should have is_auto_credential=True
auto = CredentialsFieldInfo.model_validate(
{
"credentials_provider": ["google"],
"credentials_types": ["oauth2"],
"is_auto_credential": True,
"input_field_name": "spreadsheet",
},
by_alias=True,
)
assert auto.is_auto_credential is True
assert auto.input_field_name == "spreadsheet"
def test_make_node_credentials_input_map_excludes_auto_creds(
mocker: MockerFixture,
):
"""
[Path 4] make_node_credentials_input_map should only include regular credentials,
not auto_credentials (which are resolved at execution time).
"""
from backend.data.model import CredentialsFieldInfo, CredentialsMetaInput
from backend.executor.utils import make_node_credentials_input_map
from backend.integrations.providers import ProviderName
# Create a mock graph with aggregate_credentials_inputs that returns
# both regular and auto credentials
mock_graph = mocker.MagicMock()
regular_field_info = CredentialsFieldInfo.model_validate(
{
"credentials_provider": ["github"],
"credentials_types": ["api_key"],
"is_auto_credential": False,
},
by_alias=True,
)
# Mock regular_credentials_inputs property (auto_credentials are excluded)
mock_graph.regular_credentials_inputs = {
"github_creds": (regular_field_info, {("node-1", "credentials")}, True),
}
graph_credentials_input = {
"github_creds": CredentialsMetaInput(
id="cred-123",
provider=ProviderName("github"),
type="api_key",
),
}
result = make_node_credentials_input_map(mock_graph, graph_credentials_input)
# Regular credentials should be mapped
assert "node-1" in result
assert "credentials" in result["node-1"]
# Auto credentials should NOT appear in the result
# (they would have been mapped to the kwarg_name "credentials" not "spreadsheet")
for node_id, fields in result.items():
for field_name, value in fields.items():
# Verify no auto-credential phantom entries
if isinstance(value, dict):
assert "_credentials_id" not in value

View File

@@ -342,14 +342,6 @@ async def store_media_file(
if not target_path.is_file():
raise ValueError(f"Local file does not exist: {target_path}")
# Virus scan the local file before any further processing
local_content = target_path.read_bytes()
if len(local_content) > MAX_FILE_SIZE_BYTES:
raise ValueError(
f"File too large: {len(local_content)} bytes > {MAX_FILE_SIZE_BYTES} bytes"
)
await scan_content_safe(local_content, filename=sanitized_file)
# Return based on requested format
if return_format == "for_local_processing":
# Use when processing files locally with tools like ffmpeg, MoviePy, PIL

View File

@@ -247,100 +247,3 @@ class TestFileCloudIntegration:
execution_context=make_test_context(graph_exec_id=graph_exec_id),
return_format="for_local_processing",
)
@pytest.mark.asyncio
async def test_store_media_file_local_path_scanned(self):
"""Test that local file paths are scanned for viruses."""
graph_exec_id = "test-exec-123"
local_file = "test_video.mp4"
file_content = b"fake video content"
with patch(
"backend.util.file.get_cloud_storage_handler"
) as mock_handler_getter, patch(
"backend.util.file.scan_content_safe"
) as mock_scan, patch(
"backend.util.file.Path"
) as mock_path_class:
# Mock cloud storage handler - not a cloud path
mock_handler = MagicMock()
mock_handler.is_cloud_path.return_value = False
mock_handler_getter.return_value = mock_handler
# Mock virus scanner
mock_scan.return_value = None
# Mock file system operations
mock_base_path = MagicMock()
mock_target_path = MagicMock()
mock_resolved_path = MagicMock()
mock_path_class.return_value = mock_base_path
mock_base_path.mkdir = MagicMock()
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
mock_target_path.resolve.return_value = mock_resolved_path
mock_resolved_path.is_relative_to.return_value = True
mock_resolved_path.is_file.return_value = True
mock_resolved_path.read_bytes.return_value = file_content
mock_resolved_path.relative_to.return_value = Path(local_file)
mock_resolved_path.name = local_file
result = await store_media_file(
file=MediaFileType(local_file),
execution_context=make_test_context(graph_exec_id=graph_exec_id),
return_format="for_local_processing",
)
# Verify virus scan was called for local file
mock_scan.assert_called_once_with(file_content, filename=local_file)
# Result should be the relative path
assert str(result) == local_file
@pytest.mark.asyncio
async def test_store_media_file_local_path_virus_detected(self):
"""Test that infected local files raise VirusDetectedError."""
from backend.api.features.store.exceptions import VirusDetectedError
graph_exec_id = "test-exec-123"
local_file = "infected.exe"
file_content = b"malicious content"
with patch(
"backend.util.file.get_cloud_storage_handler"
) as mock_handler_getter, patch(
"backend.util.file.scan_content_safe"
) as mock_scan, patch(
"backend.util.file.Path"
) as mock_path_class:
# Mock cloud storage handler - not a cloud path
mock_handler = MagicMock()
mock_handler.is_cloud_path.return_value = False
mock_handler_getter.return_value = mock_handler
# Mock virus scanner to detect virus
mock_scan.side_effect = VirusDetectedError(
"EICAR-Test-File", "File rejected due to virus detection"
)
# Mock file system operations
mock_base_path = MagicMock()
mock_target_path = MagicMock()
mock_resolved_path = MagicMock()
mock_path_class.return_value = mock_base_path
mock_base_path.mkdir = MagicMock()
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
mock_target_path.resolve.return_value = mock_resolved_path
mock_resolved_path.is_relative_to.return_value = True
mock_resolved_path.is_file.return_value = True
mock_resolved_path.read_bytes.return_value = file_content
with pytest.raises(VirusDetectedError):
await store_media_file(
file=MediaFileType(local_file),
execution_context=make_test_context(graph_exec_id=graph_exec_id),
return_format="for_local_processing",
)

File diff suppressed because it is too large Load Diff

View File

@@ -12,16 +12,16 @@ python = ">=3.10,<3.14"
aio-pika = "^9.5.5"
aiohttp = "^3.10.0"
aiodns = "^3.5.0"
anthropic = "^0.79.0"
anthropic = "^0.59.0"
apscheduler = "^3.11.1"
autogpt-libs = { path = "../autogpt_libs", develop = true }
bleach = { extras = ["css"], version = "^6.2.0" }
click = "^8.2.0"
cryptography = "^46.0"
cryptography = "^45.0"
discord-py = "^2.5.2"
e2b-code-interpreter = "^1.5.2"
elevenlabs = "^1.50.0"
fastapi = "^0.128.5"
fastapi = "^0.116.1"
feedparser = "^6.0.11"
flake8 = "^7.3.0"
google-api-python-client = "^2.177.0"
@@ -35,10 +35,10 @@ jinja2 = "^3.1.6"
jsonref = "^1.1.0"
jsonschema = "^4.25.0"
langfuse = "^3.11.0"
launchdarkly-server-sdk = "^9.14.1"
launchdarkly-server-sdk = "^9.12.0"
mem0ai = "^0.1.115"
moviepy = "^2.1.2"
ollama = "^0.6.1"
ollama = "^0.5.1"
openai = "^1.97.1"
orjson = "^3.10.0"
pika = "^1.3.2"
@@ -48,16 +48,16 @@ postmarker = "^1.0"
praw = "~7.8.1"
prisma = "^0.15.0"
rank-bm25 = "^0.2.2"
prometheus-client = "^0.24.1"
prometheus-client = "^0.22.1"
prometheus-fastapi-instrumentator = "^7.0.0"
psutil = "^7.0.0"
psycopg2-binary = "^2.9.10"
pydantic = { extras = ["email"], version = "^2.12.5" }
pydantic-settings = "^2.12.0"
pydantic = { extras = ["email"], version = "^2.11.7" }
pydantic-settings = "^2.10.1"
pytest = "^8.4.1"
pytest-asyncio = "^1.1.0"
python-dotenv = "^1.1.1"
python-multipart = "^0.0.22"
python-multipart = "^0.0.20"
redis = "^6.2.0"
regex = "^2025.9.18"
replicate = "^1.0.6"
@@ -65,11 +65,11 @@ sentry-sdk = {extras = ["anthropic", "fastapi", "launchdarkly", "openai", "sqlal
sqlalchemy = "^2.0.40"
strenum = "^0.4.9"
stripe = "^11.5.0"
supabase = "2.27.3"
tenacity = "^9.1.4"
todoist-api-python = "^3.2.1"
supabase = "2.17.0"
tenacity = "^9.1.2"
todoist-api-python = "^2.1.7"
tweepy = "^4.16.0"
uvicorn = { extras = ["standard"], version = "^0.40.0" }
uvicorn = { extras = ["standard"], version = "^0.35.0" }
websockets = "^15.0"
youtube-transcript-api = "^1.2.1"
yt-dlp = "2025.12.08"
@@ -77,7 +77,7 @@ zerobouncesdk = "^1.1.2"
# NOTE: please insert new dependencies in their alphabetical location
pytest-snapshot = "^0.9.0"
aiofiles = "^24.1.0"
tiktoken = "^0.12.0"
tiktoken = "^0.9.0"
aioclamd = "^1.0.0"
setuptools = "^80.9.0"
gcloud-aio-storage = "^9.5.0"
@@ -95,13 +95,13 @@ black = "^24.10.0"
faker = "^38.2.0"
httpx = "^0.28.1"
isort = "^5.13.2"
poethepoet = "^0.41.0"
poethepoet = "^0.37.0"
pre-commit = "^4.4.0"
pyright = "^1.1.407"
pytest-mock = "^3.15.1"
pytest-watcher = "^0.6.3"
pytest-watcher = "^0.4.2"
requests = "^2.32.5"
ruff = "^0.15.0"
ruff = "^0.14.5"
# NOTE: please insert new dependencies in their alphabetical location
[build-system]

View File

@@ -102,7 +102,7 @@
"react-markdown": "9.0.3",
"react-modal": "3.16.3",
"react-shepherd": "6.1.9",
"react-window": "2.2.0",
"react-window": "1.8.11",
"recharts": "3.3.0",
"rehype-autolink-headings": "7.1.0",
"rehype-highlight": "7.0.2",
@@ -140,7 +140,7 @@
"@types/react": "18.3.17",
"@types/react-dom": "18.3.5",
"@types/react-modal": "3.16.3",
"@types/react-window": "2.0.0",
"@types/react-window": "1.8.8",
"@vitejs/plugin-react": "5.1.2",
"axe-playwright": "2.2.2",
"chromatic": "13.3.3",

View File

@@ -228,8 +228,8 @@ importers:
specifier: 6.1.9
version: 6.1.9(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(typescript@5.9.3)
react-window:
specifier: 2.2.0
version: 2.2.0(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
specifier: 1.8.11
version: 1.8.11(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
recharts:
specifier: 3.3.0
version: 3.3.0(@types/react@18.3.17)(react-dom@18.3.1(react@18.3.1))(react-is@18.3.1)(react@18.3.1)(redux@5.0.1)
@@ -337,8 +337,8 @@ importers:
specifier: 3.16.3
version: 3.16.3
'@types/react-window':
specifier: 2.0.0
version: 2.0.0(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
specifier: 1.8.8
version: 1.8.8
'@vitejs/plugin-react':
specifier: 5.1.2
version: 5.1.2(vite@7.3.1(@types/node@24.10.0)(jiti@2.6.1)(terser@5.44.1)(yaml@2.8.2))
@@ -3469,9 +3469,8 @@ packages:
'@types/react-modal@3.16.3':
resolution: {integrity: sha512-xXuGavyEGaFQDgBv4UVm8/ZsG+qxeQ7f77yNrW3n+1J6XAstUy5rYHeIHPh1KzsGc6IkCIdu6lQ2xWzu1jBTLg==}
'@types/react-window@2.0.0':
resolution: {integrity: sha512-E8hMDtImEpMk1SjswSvqoSmYvk7GEtyVaTa/GJV++FdDNuMVVEzpAClyJ0nqeKYBrMkGiyH6M1+rPLM0Nu1exQ==}
deprecated: This is a stub types definition. react-window provides its own type definitions, so you do not need this installed.
'@types/react-window@1.8.8':
resolution: {integrity: sha512-8Ls660bHR1AUA2kuRvVG9D/4XpRC6wjAaPT9dil7Ckc76eP9TKWZwwmgfq8Q1LANX3QNDnoU4Zp48A3w+zK69Q==}
'@types/react@18.3.17':
resolution: {integrity: sha512-opAQ5no6LqJNo9TqnxBKsgnkIYHozW9KSTlFVoSUJYh1Fl/sswkEoqIugRSm7tbh6pABtYjGAjW+GOS23j8qbw==}
@@ -5977,6 +5976,9 @@ packages:
resolution: {integrity: sha512-UERzLsxzllchadvbPs5aolHh65ISpKpM+ccLbOJ8/vvpBKmAWf+la7dXFy7Mr0ySHbdHrFv5kGFCUHHe6GFEmw==}
engines: {node: '>= 4.0.0'}
memoize-one@5.2.1:
resolution: {integrity: sha512-zYiwtZUcYyXKo/np96AGZAckk+FWWsUdJ3cHGGmld7+AhvcWmQyGCYUh1hc4Q/pkOhb65dQR/pqCyK0cOaHz4Q==}
merge-stream@2.0.0:
resolution: {integrity: sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==}
@@ -6889,11 +6891,12 @@ packages:
'@types/react':
optional: true
react-window@2.2.0:
resolution: {integrity: sha512-Y2L7yonHq6K1pQA2P98wT5QdIsEcjBTB7T8o6Mub12hH9eYppXoYu6vgClmcjlh3zfNcW2UrXiJJJqDxUY7GVw==}
react-window@1.8.11:
resolution: {integrity: sha512-+SRbUVT2scadgFSWx+R1P754xHPEqvcfSfVX10QYg6POOz+WNgkN48pS+BtZNIMGiL1HYrSEiCkwsMS15QogEQ==}
engines: {node: '>8.0.0'}
peerDependencies:
react: ^18.0.0 || ^19.0.0
react-dom: ^18.0.0 || ^19.0.0
react: ^15.0.0 || ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
react-dom: ^15.0.0 || ^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
react@18.3.1:
resolution: {integrity: sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==}
@@ -11600,12 +11603,9 @@ snapshots:
dependencies:
'@types/react': 18.3.17
'@types/react-window@2.0.0(react-dom@18.3.1(react@18.3.1))(react@18.3.1)':
'@types/react-window@1.8.8':
dependencies:
react-window: 2.2.0(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
transitivePeerDependencies:
- react
- react-dom
'@types/react': 18.3.17
'@types/react@18.3.17':
dependencies:
@@ -14545,6 +14545,8 @@ snapshots:
dependencies:
fs-monkey: 1.1.0
memoize-one@5.2.1: {}
merge-stream@2.0.0: {}
merge2@1.4.1: {}
@@ -15590,8 +15592,10 @@ snapshots:
optionalDependencies:
'@types/react': 18.3.17
react-window@2.2.0(react-dom@18.3.1(react@18.3.1))(react@18.3.1):
react-window@1.8.11(react-dom@18.3.1(react@18.3.1))(react@18.3.1):
dependencies:
'@babel/runtime': 7.28.4
memoize-one: 5.2.1
react: 18.3.1
react-dom: 18.3.1(react@18.3.1)

View File

@@ -12307,9 +12307,7 @@
"title": "Location"
},
"msg": { "type": "string", "title": "Message" },
"type": { "type": "string", "title": "Error Type" },
"input": { "title": "Input" },
"ctx": { "type": "object", "title": "Context" }
"type": { "type": "string", "title": "Error Type" }
},
"type": "object",
"required": ["loc", "msg", "type"],

View File

@@ -104,31 +104,7 @@ export function FileInput(props: Props) {
return false;
}
const getFileLabelFromValue = (val: unknown): string => {
// Handle object format from external API: { name, type, size, data }
if (val && typeof val === "object") {
const obj = val as Record<string, unknown>;
if (typeof obj.name === "string") {
return getFileLabel(
obj.name,
typeof obj.type === "string" ? obj.type : "",
);
}
if (typeof obj.type === "string") {
const mimeParts = obj.type.split("/");
if (mimeParts.length > 1) {
return `${mimeParts[1].toUpperCase()} file`;
}
return `${obj.type} file`;
}
return "File";
}
// Handle string values (data URIs or file paths)
if (typeof val !== "string") {
return "File";
}
const getFileLabelFromValue = (val: string) => {
if (val.startsWith("data:")) {
const matches = val.match(/^data:([^;]+);/);
if (matches?.[1]) {

View File

@@ -4,7 +4,9 @@ import { loadScript } from "@/services/scripts/scripts";
export async function loadGoogleAPIPicker(): Promise<void> {
validateWindow();
await loadScript("https://apis.google.com/js/api.js");
await loadScript("https://apis.google.com/js/api.js", {
referrerPolicy: "no-referrer-when-downgrade",
});
const googleAPI = window.gapi;
if (!googleAPI) {
@@ -27,7 +29,9 @@ export async function loadGoogleIdentityServices(): Promise<void> {
throw new Error("Google Identity Services cannot load on server");
}
await loadScript("https://accounts.google.com/gsi/client");
await loadScript("https://accounts.google.com/gsi/client", {
referrerPolicy: "no-referrer-when-downgrade",
});
const google = window.google;
if (!google?.accounts?.oauth2) {

View File

@@ -4,7 +4,7 @@ import { Button } from "@/components/atoms/Button/Button";
import { Input } from "@/components/atoms/Input/Input";
import { Text } from "@/components/atoms/Text/Text";
import { Bell, MagnifyingGlass, X } from "@phosphor-icons/react";
import { List, type RowComponentProps } from "react-window";
import { FixedSizeList as List } from "react-window";
import { AgentExecutionWithInfo } from "../../helpers";
import { ActivityItem } from "../ActivityItem";
import styles from "./styles.module.css";
@@ -19,16 +19,14 @@ interface Props {
recentFailures: AgentExecutionWithInfo[];
}
interface ActivityRowProps {
executions: AgentExecutionWithInfo[];
interface VirtualizedItemProps {
index: number;
style: React.CSSProperties;
data: AgentExecutionWithInfo[];
}
function VirtualizedActivityItem({
index,
style,
executions,
}: RowComponentProps<ActivityRowProps>) {
const execution = executions[index];
function VirtualizedActivityItem({ index, style, data }: VirtualizedItemProps) {
const execution = data[index];
return (
<div style={style}>
<ActivityItem execution={execution} />
@@ -131,13 +129,14 @@ export function ActivityDropdown({
>
{filteredExecutions.length > 0 ? (
<List
defaultHeight={listHeight}
rowCount={filteredExecutions.length}
rowHeight={itemHeight}
rowProps={{ executions: filteredExecutions }}
rowComponent={VirtualizedActivityItem}
style={{ width: 320, height: listHeight }}
/>
height={listHeight}
width={320} // Match dropdown width (w-80 = 20rem = 320px)
itemCount={filteredExecutions.length}
itemSize={itemHeight}
itemData={filteredExecutions}
>
{VirtualizedActivityItem}
</List>
) : (
<div className="flex h-full flex-col items-center justify-center gap-5 pb-8 pt-6">
<div className="mx-auto inline-flex flex-col items-center justify-center rounded-full bg-bgLightGrey p-6">