fix(chat/tools): remove hard output truncation from sandbox and web_fetch

The SDK already handles oversized tool output by writing to tool-results files and reading back via MCP. Our 50K char truncation was cutting off output before the agent could see it — the SDK's mechanism is the proper way to handle large results.
fix(chat/sdk): align read_transcript_file min lines with validate_transcript
2026-02-13 08:14:58 -05:00 · 2026-02-13 16:56:05 +04:00 · 2026-02-13 16:39:32 +04:00 · 2026-02-13 16:38:03 +04:00 · 2026-02-13 16:32:06 +04:00 · 2026-02-13 15:49:30 +04:00
70 changed files with 7321 additions and 672 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -5,42 +5,13 @@
 !docs/
 # Platform - Libs
-!autogpt_platform/autogpt_libs/autogpt_libs/
+!autogpt_platform/autogpt_libs/
 !autogpt_platform/autogpt_libs/pyproject.toml
 !autogpt_platform/autogpt_libs/poetry.lock
 !autogpt_platform/autogpt_libs/README.md
 # Platform - Backend
-!autogpt_platform/backend/backend/
+!autogpt_platform/backend/
 !autogpt_platform/backend/test/e2e_test_data.py
 !autogpt_platform/backend/migrations/
 !autogpt_platform/backend/schema.prisma
 !autogpt_platform/backend/pyproject.toml
 !autogpt_platform/backend/poetry.lock
 !autogpt_platform/backend/README.md
 !autogpt_platform/backend/.env
 !autogpt_platform/backend/gen_prisma_types_stub.py
 # Platform - Market
 !autogpt_platform/market/market/
 !autogpt_platform/market/scripts.py
 !autogpt_platform/market/schema.prisma
 !autogpt_platform/market/pyproject.toml
 !autogpt_platform/market/poetry.lock
 !autogpt_platform/market/README.md
 # Platform - Frontend
-!autogpt_platform/frontend/src/
+!autogpt_platform/frontend/
 !autogpt_platform/frontend/public/
 !autogpt_platform/frontend/scripts/
 !autogpt_platform/frontend/package.json
 !autogpt_platform/frontend/pnpm-lock.yaml
 !autogpt_platform/frontend/tsconfig.json
 !autogpt_platform/frontend/README.md
 ## config
 !autogpt_platform/frontend/*.config.*
 !autogpt_platform/frontend/.env.*
 !autogpt_platform/frontend/.env
 # Classic - AutoGPT
 !classic/original_autogpt/autogpt/
@@ -64,6 +35,38 @@
 # Classic - Frontend
 !classic/frontend/build/web/
-# Explicitly re-ignore some folders
+# Explicitly re-ignore unwanted files from whitelisted directories
-.*
+# Note: These patterns MUST come after the whitelist rules to take effect
-**/__pycache__
+
 # Hidden files and directories (but keep frontend .env files needed for build)
 **/.*
 !autogpt_platform/frontend/.env
 !autogpt_platform/frontend/.env.default
 !autogpt_platform/frontend/.env.production
 # Python artifacts
 **/__pycache__/
 **/*.pyc
 **/*.pyo
 **/.venv/
 **/.ruff_cache/
 **/.pytest_cache/
 **/.coverage
 **/htmlcov/
 # Node artifacts
 **/node_modules/
 **/.next/
 **/storybook-static/
 **/playwright-report/
 **/test-results/
 # Build artifacts
 **/dist/
 **/build/
 !autogpt_platform/frontend/src/**/build/
 **/target/
 # Logs and temp files
 **/*.log
 **/*.tmp
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -26,7 +26,6 @@ jobs:
  setup:
    runs-on: ubuntu-latest
    outputs:
      cache-key: ${{ steps.cache-key.outputs.key }}
      components-changed: ${{ steps.filter.outputs.components }}
    steps:
@@ -41,28 +40,17 @@ jobs:
            components:
              - 'autogpt_platform/frontend/src/components/**'
      - name: Set up Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
      - name: Enable corepack
        run: corepack enable
-      - name: Generate cache key
+      - name: Set up Node
-        id: cache-key
+        uses: actions/setup-node@v6
        run: echo "key=${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/package.json') }}" >> $GITHUB_OUTPUT
      - name: Cache dependencies
        uses: actions/cache@v5
        with:
-          path: ~/.pnpm-store
+          node-version: "22.18.0"
-          key: ${{ steps.cache-key.outputs.key }}
+          cache: "pnpm"
-          restore-keys: |
+          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml
            ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml') }}
            ${{ runner.os }}-pnpm-
-      - name: Install dependencies
+      - name: Install dependencies to populate cache
        run: pnpm install --frozen-lockfile
  lint:
@@ -73,22 +61,15 @@ jobs:
      - name: Checkout repository
        uses: actions/checkout@v6
      - name: Set up Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
      - name: Enable corepack
        run: corepack enable
-      - name: Restore dependencies cache
+      - name: Set up Node
-        uses: actions/cache@v5
+        uses: actions/setup-node@v6
        with:
-          path: ~/.pnpm-store
+          node-version: "22.18.0"
-          key: ${{ needs.setup.outputs.cache-key }}
+          cache: "pnpm"
-          restore-keys: |
+          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml
            ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml') }}
            ${{ runner.os }}-pnpm-
      - name: Install dependencies
        run: pnpm install --frozen-lockfile
@@ -111,22 +92,15 @@ jobs:
        with:
          fetch-depth: 0
      - name: Set up Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
      - name: Enable corepack
        run: corepack enable
-      - name: Restore dependencies cache
+      - name: Set up Node
-        uses: actions/cache@v5
+        uses: actions/setup-node@v6
        with:
-          path: ~/.pnpm-store
+          node-version: "22.18.0"
-          key: ${{ needs.setup.outputs.cache-key }}
+          cache: "pnpm"
-          restore-keys: |
+          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml
            ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml') }}
            ${{ runner.os }}-pnpm-
      - name: Install dependencies
        run: pnpm install --frozen-lockfile
@@ -141,10 +115,8 @@ jobs:
          exitOnceUploaded: true
  e2e_test:
    name: end-to-end tests
    runs-on: big-boi
    needs: setup
    strategy:
      fail-fast: false
    steps:
      - name: Checkout repository
@@ -152,19 +124,11 @@ jobs:
        with:
          submodules: recursive
-      - name: Set up Node.js
+      - name: Set up Platform - Copy default supabase .env
        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
      - name: Enable corepack
        run: corepack enable
      - name: Copy default supabase .env
        run: |
          cp ../.env.default ../.env
-      - name: Copy backend .env and set OpenAI API key
+      - name: Set up Platform - Copy backend .env and set OpenAI API key
        run: |
          cp ../backend/.env.default ../backend/.env
          echo "OPENAI_INTERNAL_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ../backend/.env
@@ -172,77 +136,125 @@ jobs:
          # Used by E2E test data script to generate embeddings for approved store agents
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      - name: Set up Docker Buildx
+      - name: Set up Platform - Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
        with:
          driver: docker-container
          driver-opts: network=host
-      - name: Cache Docker layers
+      - name: Set up Platform - Expose GHA cache to docker buildx CLI
        uses: crazy-max/ghaction-github-runtime@v3
      - name: Set up Platform - Build Docker images (with cache)
        working-directory: autogpt_platform
        run: |
          pip install pyyaml
          # Resolve extends and generate a flat compose file that bake can understand
          docker compose -f docker-compose.yml config > docker-compose.resolved.yml
          # Add cache configuration to the resolved compose file
          python ../.github/workflows/scripts/docker-ci-fix-compose-build-cache.py \
            --source docker-compose.resolved.yml \
            --cache-from "type=gha" \
            --cache-to "type=gha,mode=max" \
            --backend-hash "${{ hashFiles('autogpt_platform/backend/Dockerfile', 'autogpt_platform/backend/poetry.lock', 'autogpt_platform/backend/backend') }}" \
            --frontend-hash "${{ hashFiles('autogpt_platform/frontend/Dockerfile', 'autogpt_platform/frontend/pnpm-lock.yaml', 'autogpt_platform/frontend/src') }}" \
            --git-ref "${{ github.ref }}"
          # Build with bake using the resolved compose file (now includes cache config)
          docker buildx bake --allow=fs.read=.. -f docker-compose.resolved.yml --load
        env:
          NEXT_PUBLIC_PW_TEST: true
      - name: Set up tests - Cache E2E test data
        id: e2e-data-cache
        uses: actions/cache@v5
        with:
-          path: /tmp/.buildx-cache
+          path: /tmp/e2e_test_data.sql
-          key: ${{ runner.os }}-buildx-frontend-test-${{ hashFiles('autogpt_platform/docker-compose.yml', 'autogpt_platform/backend/Dockerfile', 'autogpt_platform/backend/pyproject.toml', 'autogpt_platform/backend/poetry.lock') }}
+          key: e2e-test-data-${{ hashFiles('autogpt_platform/backend/test/e2e_test_data.py', 'autogpt_platform/backend/migrations/**', '.github/workflows/platform-frontend-ci.yml') }}
          restore-keys: |
            ${{ runner.os }}-buildx-frontend-test-
-      - name: Run docker compose
+      - name: Set up Platform - Start Supabase DB + Auth
        run: |
-          NEXT_PUBLIC_PW_TEST=true docker compose -f ../docker-compose.yml up -d
+          docker compose -f ../docker-compose.resolved.yml up -d db auth --no-build
          echo "Waiting for database to be ready..."
          timeout 60 sh -c 'until docker compose -f ../docker-compose.resolved.yml exec -T db pg_isready -U postgres 2>/dev/null; do sleep 2; done'
          echo "Waiting for auth service to be ready..."
          timeout 60 sh -c 'until docker compose -f ../docker-compose.resolved.yml exec -T db psql -U postgres -d postgres -c "SELECT 1 FROM auth.users LIMIT 1" 2>/dev/null; do sleep 2; done' || echo "Auth schema check timeout, continuing..."
      - name: Set up Platform - Run migrations
        run: |
          echo "Running migrations..."
          docker compose -f ../docker-compose.resolved.yml run --rm migrate
          echo "✅ Migrations completed"
        env:
-          DOCKER_BUILDKIT: 1
+          NEXT_PUBLIC_PW_TEST: true
          BUILDX_CACHE_FROM: type=local,src=/tmp/.buildx-cache
          BUILDX_CACHE_TO: type=local,dest=/tmp/.buildx-cache-new,mode=max
-      - name: Move cache
+      - name: Set up tests - Load cached E2E test data
        if: steps.e2e-data-cache.outputs.cache-hit == 'true'
        run: |
-          rm -rf /tmp/.buildx-cache
+          echo "✅ Found cached E2E test data, restoring..."
-          if [ -d "/tmp/.buildx-cache-new" ]; then
+          {
-            mv /tmp/.buildx-cache-new /tmp/.buildx-cache
+            echo "SET session_replication_role = 'replica';"
-          fi
+            cat /tmp/e2e_test_data.sql
            echo "SET session_replication_role = 'origin';"
          } | docker compose -f ../docker-compose.resolved.yml exec -T db psql -U postgres -d postgres -b
          # Refresh materialized views after restore
          docker compose -f ../docker-compose.resolved.yml exec -T db \
            psql -U postgres -d postgres -b -c "SET search_path TO platform; SELECT refresh_store_materialized_views();" || true
-      - name: Wait for services to be ready
+          echo "✅ E2E test data restored from cache"
      - name: Set up Platform - Start (all other services)
        run: |
          docker compose -f ../docker-compose.resolved.yml up -d --no-build
          echo "Waiting for rest_server to be ready..."
          timeout 60 sh -c 'until curl -f http://localhost:8006/health 2>/dev/null; do sleep 2; done' || echo "Rest server health check timeout, continuing..."
-          echo "Waiting for database to be ready..."
+        env:
-          timeout 60 sh -c 'until docker compose -f ../docker-compose.yml exec -T db pg_isready -U postgres 2>/dev/null; do sleep 2; done' || echo "Database ready check timeout, continuing..."
+          NEXT_PUBLIC_PW_TEST: true
-      - name: Create E2E test data
+      - name: Set up tests - Create E2E test data
        if: steps.e2e-data-cache.outputs.cache-hit != 'true'
        run: |
          echo "Creating E2E test data..."
-          # First try to run the script from inside the container
+          docker cp ../backend/test/e2e_test_data.py $(docker compose -f ../docker-compose.resolved.yml ps -q rest_server):/tmp/e2e_test_data.py
-          if docker compose -f ../docker-compose.yml exec -T rest_server test -f /app/autogpt_platform/backend/test/e2e_test_data.py; then
+          docker compose -f ../docker-compose.resolved.yml exec -T rest_server sh -c "cd /app/autogpt_platform && python /tmp/e2e_test_data.py" || {
-            echo "✅ Found e2e_test_data.py in container, running it..."
+            echo "❌ E2E test data creation failed!"
-            docker compose -f ../docker-compose.yml exec -T rest_server sh -c "cd /app/autogpt_platform && python backend/test/e2e_test_data.py" || {
+            docker compose -f ../docker-compose.resolved.yml logs --tail=50 rest_server
-              echo "❌ E2E test data creation failed!"
+            exit 1
-              docker compose -f ../docker-compose.yml logs --tail=50 rest_server
+          }
              exit 1
            }
          else
            echo "⚠️ e2e_test_data.py not found in container, copying and running..."
            # Copy the script into the container and run it
            docker cp ../backend/test/e2e_test_data.py $(docker compose -f ../docker-compose.yml ps -q rest_server):/tmp/e2e_test_data.py || {
              echo "❌ Failed to copy script to container"
              exit 1
            }
            docker compose -f ../docker-compose.yml exec -T rest_server sh -c "cd /app/autogpt_platform && python /tmp/e2e_test_data.py" || {
              echo "❌ E2E test data creation failed!"
              docker compose -f ../docker-compose.yml logs --tail=50 rest_server
              exit 1
            }
          fi
-      - name: Restore dependencies cache
+          # Dump auth.users + platform schema for cache (two separate dumps)
-        uses: actions/cache@v5
+          echo "Dumping database for cache..."
          {
            docker compose -f ../docker-compose.resolved.yml exec -T db \
              pg_dump -U postgres --data-only --column-inserts \
              --table='auth.users' postgres
            docker compose -f ../docker-compose.resolved.yml exec -T db \
              pg_dump -U postgres --data-only --column-inserts \
              --schema=platform \
              --exclude-table='platform._prisma_migrations' \
              --exclude-table='platform.apscheduler_jobs' \
              --exclude-table='platform.apscheduler_jobs_batched_notifications' \
              postgres
          } > /tmp/e2e_test_data.sql
          echo "✅ Database dump created for caching ($(wc -l < /tmp/e2e_test_data.sql) lines)"
      - name: Set up tests - Enable corepack
        run: corepack enable
      - name: Set up tests - Set up Node
        uses: actions/setup-node@v6
        with:
-          path: ~/.pnpm-store
+          node-version: "22.18.0"
-          key: ${{ needs.setup.outputs.cache-key }}
+          cache: "pnpm"
-          restore-keys: |
+          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml
            ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml') }}
            ${{ runner.os }}-pnpm-
-      - name: Install dependencies
+      - name: Set up tests - Install dependencies
        run: pnpm install --frozen-lockfile
-      - name: Install Browser 'chromium'
+      - name: Set up tests - Install browser 'chromium'
        run: pnpm playwright install --with-deps chromium
      - name: Run Playwright tests
@@ -269,7 +281,7 @@ jobs:
      - name: Print Final Docker Compose logs
        if: always()
-        run: docker compose -f ../docker-compose.yml logs
+        run: docker compose -f ../docker-compose.resolved.yml logs
  integration_test:
    runs-on: ubuntu-latest
@@ -281,22 +293,15 @@ jobs:
        with:
          submodules: recursive
      - name: Set up Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
      - name: Enable corepack
        run: corepack enable
-      - name: Restore dependencies cache
+      - name: Set up Node
-        uses: actions/cache@v5
+        uses: actions/setup-node@v6
        with:
-          path: ~/.pnpm-store
+          node-version: "22.18.0"
-          key: ${{ needs.setup.outputs.cache-key }}
+          cache: "pnpm"
-          restore-keys: |
+          cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml
            ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml') }}
            ${{ runner.os }}-pnpm-
      - name: Install dependencies
        run: pnpm install --frozen-lockfile
--- a/.github/workflows/scripts/docker-ci-fix-compose-build-cache.py
+++ b/.github/workflows/scripts/docker-ci-fix-compose-build-cache.py
@@ -0,0 +1,195 @@
 #!/usr/bin/env python3
 """
 Add cache configuration to a resolved docker-compose file for all services
 that have a build key, and ensure image names match what docker compose expects.
 """
 import argparse
 import yaml
 DEFAULT_BRANCH = "dev"
 CACHE_BUILDS_FOR_COMPONENTS = ["backend", "frontend"]
 def main():
    parser = argparse.ArgumentParser(
        description="Add cache config to a resolved compose file"
    )
    parser.add_argument(
        "--source",
        required=True,
        help="Source compose file to read (should be output of `docker compose config`)",
    )
    parser.add_argument(
        "--cache-from",
        default="type=gha",
        help="Cache source configuration",
    )
    parser.add_argument(
        "--cache-to",
        default="type=gha,mode=max",
        help="Cache destination configuration",
    )
    for component in CACHE_BUILDS_FOR_COMPONENTS:
        parser.add_argument(
            f"--{component}-hash",
            default="",
            help=f"Hash for {component} cache scope (e.g., from hashFiles())",
        )
    parser.add_argument(
        "--git-ref",
        default="",
        help="Git ref for branch-based cache scope (e.g., refs/heads/master)",
    )
    args = parser.parse_args()
    # Normalize git ref to a safe scope name (e.g., refs/heads/master -> master)
    git_ref_scope = ""
    if args.git_ref:
        git_ref_scope = args.git_ref.replace("refs/heads/", "").replace("/", "-")
    with open(args.source, "r") as f:
        compose = yaml.safe_load(f)
    # Get project name from compose file or default
    project_name = compose.get("name", "autogpt_platform")
    def get_image_name(dockerfile: str, target: str) -> str:
        """Generate image name based on Dockerfile folder and build target."""
        dockerfile_parts = dockerfile.replace("\\", "/").split("/")
        if len(dockerfile_parts) >= 2:
            folder_name = dockerfile_parts[-2]  # e.g., "backend" or "frontend"
        else:
            folder_name = "app"
        return f"{project_name}-{folder_name}:{target}"
    def get_build_key(dockerfile: str, target: str) -> str:
        """Generate a unique key for a Dockerfile+target combination."""
        return f"{dockerfile}:{target}"
    def get_component(dockerfile: str) -> str | None:
        """Get component name (frontend/backend) from dockerfile path."""
        for component in CACHE_BUILDS_FOR_COMPONENTS:
            if component in dockerfile:
                return component
        return None
    # First pass: collect all services with build configs and identify duplicates
    # Track which (dockerfile, target) combinations we've seen
    build_key_to_first_service: dict[str, str] = {}
    services_to_build: list[str] = []
    services_to_dedupe: list[str] = []
    for service_name, service_config in compose.get("services", {}).items():
        if "build" not in service_config:
            continue
        build_config = service_config["build"]
        dockerfile = build_config.get("dockerfile", "Dockerfile")
        target = build_config.get("target", "default")
        build_key = get_build_key(dockerfile, target)
        if build_key not in build_key_to_first_service:
            # First service with this build config - it will do the actual build
            build_key_to_first_service[build_key] = service_name
            services_to_build.append(service_name)
        else:
            # Duplicate - will just use the image from the first service
            services_to_dedupe.append(service_name)
    # Second pass: configure builds and deduplicate
    modified_services = []
    for service_name, service_config in compose.get("services", {}).items():
        if "build" not in service_config:
            continue
        build_config = service_config["build"]
        dockerfile = build_config.get("dockerfile", "Dockerfile")
        target = build_config.get("target", "latest")
        image_name = get_image_name(dockerfile, target)
        # Set image name for all services (needed for both builders and deduped)
        service_config["image"] = image_name
        if service_name in services_to_dedupe:
            # Remove build config - this service will use the pre-built image
            del service_config["build"]
            continue
        # This service will do the actual build - add cache config
        cache_from_list = []
        cache_to_list = []
        component = get_component(dockerfile)
        if not component:
            # Skip services that don't clearly match frontend/backend
            continue
        # Get the hash for this component
        component_hash = getattr(args, f"{component}_hash")
        # Scope format: platform-{component}-{target}-{hash|ref}
        # Example: platform-backend-server-abc123
        if "type=gha" in args.cache_from:
            # 1. Primary: exact hash match (most specific)
            if component_hash:
                hash_scope = f"platform-{component}-{target}-{component_hash}"
                cache_from_list.append(f"{args.cache_from},scope={hash_scope}")
            # 2. Fallback: branch-based cache
            if git_ref_scope:
                ref_scope = f"platform-{component}-{target}-{git_ref_scope}"
                cache_from_list.append(f"{args.cache_from},scope={ref_scope}")
            # 3. Fallback: dev branch cache (for PRs/feature branches)
            if git_ref_scope and git_ref_scope != DEFAULT_BRANCH:
                master_scope = f"platform-{component}-{target}-{DEFAULT_BRANCH}"
                cache_from_list.append(f"{args.cache_from},scope={master_scope}")
        if "type=gha" in args.cache_to:
            # Write to both hash-based and branch-based scopes
            if component_hash:
                hash_scope = f"platform-{component}-{target}-{component_hash}"
                cache_to_list.append(f"{args.cache_to},scope={hash_scope}")
            if git_ref_scope:
                ref_scope = f"platform-{component}-{target}-{git_ref_scope}"
                cache_to_list.append(f"{args.cache_to},scope={ref_scope}")
        # Ensure we have at least one cache source/target
        if not cache_from_list:
            cache_from_list.append(args.cache_from)
        if not cache_to_list:
            cache_to_list.append(args.cache_to)
        build_config["cache_from"] = cache_from_list
        build_config["cache_to"] = cache_to_list
        modified_services.append(service_name)
    # Write back to the same file
    with open(args.source, "w") as f:
        yaml.dump(compose, f, default_flow_style=False, sort_keys=False)
    print(f"Added cache config to {len(modified_services)} services in {args.source}:")
    for svc in modified_services:
        svc_config = compose["services"][svc]
        build_cfg = svc_config.get("build", {})
        cache_from_list = build_cfg.get("cache_from", ["none"])
        cache_to_list = build_cfg.get("cache_to", ["none"])
        print(f"  - {svc}")
        print(f"      image: {svc_config.get('image', 'N/A')}")
        print(f"      cache_from: {cache_from_list}")
        print(f"      cache_to: {cache_to_list}")
    if services_to_dedupe:
        print(
            f"Deduplicated {len(services_to_dedupe)} services (will use pre-built images):"
        )
        for svc in services_to_dedupe:
            print(f"  - {svc} -> {compose['services'][svc].get('image', 'N/A')}")
 if __name__ == "__main__":
    main()
--- a/autogpt_platform/CLAUDE.md
+++ b/autogpt_platform/CLAUDE.md
@@ -45,6 +45,11 @@ AutoGPT Platform is a monorepo containing:
 - Backend/Frontend services use YAML anchors for consistent configuration
 - Supabase services (`db/docker/docker-compose.yml`) follow the same pattern
 ### Branching Strategy
 - **`dev`** is the main development branch. All PRs should target `dev`.
 - **`master`** is the production branch. Only used for production releases.
 ### Creating Pull Requests
 - Create the PR against the `dev` branch of the repository.
--- a/autogpt_platform/autogpt_libs/poetry.lock
+++ b/autogpt_platform/autogpt_libs/poetry.lock
@@ -448,61 +448,61 @@ toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
 [[package]]
 name = "cryptography"
-version = "46.0.4"
+version = "46.0.5"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
 python-versions = "!=3.9.0,!=3.9.1,>=3.8"
 groups = ["main"]
 files = [
-    {file = "cryptography-46.0.4-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:281526e865ed4166009e235afadf3a4c4cba6056f99336a99efba65336fd5485"},
+    {file = "cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad"},
-    {file = "cryptography-46.0.4-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f14fba5bf6f4390d7ff8f086c566454bff0411f6d8aa7af79c88b6f9267aecc"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b"},
-    {file = "cryptography-46.0.4-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47bcd19517e6389132f76e2d5303ded6cf3f78903da2158a671be8de024f4cd0"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b"},
-    {file = "cryptography-46.0.4-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:01df4f50f314fbe7009f54046e908d1754f19d0c6d3070df1e6268c5a4af09fa"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263"},
-    {file = "cryptography-46.0.4-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5aa3e463596b0087b3da0dbe2b2487e9fc261d25da85754e30e3b40637d61f81"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d"},
-    {file = "cryptography-46.0.4-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0a9ad24359fee86f131836a9ac3bffc9329e956624a2d379b613f8f8abaf5255"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed"},
-    {file = "cryptography-46.0.4-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:dc1272e25ef673efe72f2096e92ae39dea1a1a450dd44918b15351f72c5a168e"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2"},
-    {file = "cryptography-46.0.4-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:de0f5f4ec8711ebc555f54735d4c673fc34b65c44283895f1a08c2b49d2fd99c"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2"},
-    {file = "cryptography-46.0.4-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:eeeb2e33d8dbcccc34d64651f00a98cb41b2dc69cef866771a5717e6734dfa32"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0"},
-    {file = "cryptography-46.0.4-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:3d425eacbc9aceafd2cb429e42f4e5d5633c6f873f5e567077043ef1b9bbf616"},
+    {file = "cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731"},
-    {file = "cryptography-46.0.4-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91627ebf691d1ea3976a031b61fb7bac1ccd745afa03602275dda443e11c8de0"},
+    {file = "cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82"},
-    {file = "cryptography-46.0.4-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2d08bc22efd73e8854b0b7caff402d735b354862f1145d7be3b9c0f740fef6a0"},
+    {file = "cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1"},
-    {file = "cryptography-46.0.4-cp311-abi3-win32.whl", hash = "sha256:82a62483daf20b8134f6e92898da70d04d0ef9a75829d732ea1018678185f4f5"},
+    {file = "cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48"},
-    {file = "cryptography-46.0.4-cp311-abi3-win_amd64.whl", hash = "sha256:6225d3ebe26a55dbc8ead5ad1265c0403552a63336499564675b29eb3184c09b"},
+    {file = "cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4"},
-    {file = "cryptography-46.0.4-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:485e2b65d25ec0d901bca7bcae0f53b00133bf3173916d8e421f6fddde103908"},
+    {file = "cryptography-46.0.5-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2"},
-    {file = "cryptography-46.0.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:078e5f06bd2fa5aea5a324f2a09f914b1484f1d0c2a4d6a8a28c74e72f65f2da"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678"},
-    {file = "cryptography-46.0.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dce1e4f068f03008da7fa51cc7abc6ddc5e5de3e3d1550334eaf8393982a5829"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87"},
-    {file = "cryptography-46.0.4-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:2067461c80271f422ee7bdbe79b9b4be54a5162e90345f86a23445a0cf3fd8a2"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee"},
-    {file = "cryptography-46.0.4-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:c92010b58a51196a5f41c3795190203ac52edfd5dc3ff99149b4659eba9d2085"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981"},
-    {file = "cryptography-46.0.4-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:829c2b12bbc5428ab02d6b7f7e9bbfd53e33efd6672d21341f2177470171ad8b"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9"},
-    {file = "cryptography-46.0.4-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:62217ba44bf81b30abaeda1488686a04a702a261e26f87db51ff61d9d3510abd"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648"},
-    {file = "cryptography-46.0.4-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:9c2da296c8d3415b93e6053f5a728649a87a48ce084a9aaf51d6e46c87c7f2d2"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4"},
-    {file = "cryptography-46.0.4-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:9b34d8ba84454641a6bf4d6762d15847ecbd85c1316c0a7984e6e4e9f748ec2e"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0"},
-    {file = "cryptography-46.0.4-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:df4a817fa7138dd0c96c8c8c20f04b8aaa1fac3bbf610913dcad8ea82e1bfd3f"},
+    {file = "cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663"},
-    {file = "cryptography-46.0.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b1de0ebf7587f28f9190b9cb526e901bf448c9e6a99655d2b07fff60e8212a82"},
+    {file = "cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826"},
-    {file = "cryptography-46.0.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9b4d17bc7bd7cdd98e3af40b441feaea4c68225e2eb2341026c84511ad246c0c"},
+    {file = "cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d"},
-    {file = "cryptography-46.0.4-cp314-cp314t-win32.whl", hash = "sha256:c411f16275b0dea722d76544a61d6421e2cc829ad76eec79280dbdc9ddf50061"},
+    {file = "cryptography-46.0.5-cp314-cp314t-win32.whl", hash = "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a"},
-    {file = "cryptography-46.0.4-cp314-cp314t-win_amd64.whl", hash = "sha256:728fedc529efc1439eb6107b677f7f7558adab4553ef8669f0d02d42d7b959a7"},
+    {file = "cryptography-46.0.5-cp314-cp314t-win_amd64.whl", hash = "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4"},
-    {file = "cryptography-46.0.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a9556ba711f7c23f77b151d5798f3ac44a13455cc68db7697a1096e6d0563cab"},
+    {file = "cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31"},
-    {file = "cryptography-46.0.4-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8bf75b0259e87fa70bddc0b8b4078b76e7fd512fd9afae6c1193bcf440a4dbef"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18"},
-    {file = "cryptography-46.0.4-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3c268a3490df22270955966ba236d6bc4a8f9b6e4ffddb78aac535f1a5ea471d"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235"},
-    {file = "cryptography-46.0.4-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:812815182f6a0c1d49a37893a303b44eaac827d7f0d582cecfc81b6427f22973"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a"},
-    {file = "cryptography-46.0.4-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:a90e43e3ef65e6dcf969dfe3bb40cbf5aef0d523dff95bfa24256be172a845f4"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76"},
-    {file = "cryptography-46.0.4-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a05177ff6296644ef2876fce50518dffb5bcdf903c85250974fc8bc85d54c0af"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614"},
-    {file = "cryptography-46.0.4-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:daa392191f626d50f1b136c9b4cf08af69ca8279d110ea24f5c2700054d2e263"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229"},
-    {file = "cryptography-46.0.4-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e07ea39c5b048e085f15923511d8121e4a9dc45cee4e3b970ca4f0d338f23095"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1"},
-    {file = "cryptography-46.0.4-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:d5a45ddc256f492ce42a4e35879c5e5528c09cd9ad12420828c972951d8e016b"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d"},
-    {file = "cryptography-46.0.4-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:6bb5157bf6a350e5b28aee23beb2d84ae6f5be390b2f8ee7ea179cda077e1019"},
+    {file = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c"},
-    {file = "cryptography-46.0.4-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:dd5aba870a2c40f87a3af043e0dee7d9eb02d4aff88a797b48f2b43eff8c3ab4"},
+    {file = "cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4"},
-    {file = "cryptography-46.0.4-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:93d8291da8d71024379ab2cb0b5c57915300155ad42e07f76bea6ad838d7e59b"},
+    {file = "cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9"},
-    {file = "cryptography-46.0.4-cp38-abi3-win32.whl", hash = "sha256:0563655cb3c6d05fb2afe693340bc050c30f9f34e15763361cf08e94749401fc"},
+    {file = "cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72"},
-    {file = "cryptography-46.0.4-cp38-abi3-win_amd64.whl", hash = "sha256:fa0900b9ef9c49728887d1576fd8d9e7e3ea872fa9b25ef9b64888adc434e976"},
+    {file = "cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595"},
-    {file = "cryptography-46.0.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:766330cce7416c92b5e90c3bb71b1b79521760cdcfc3a6a1a182d4c9fab23d2b"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:3b4995dc971c9fb83c25aa44cf45f02ba86f71ee600d81091c2f0cbae116b06c"},
-    {file = "cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c236a44acfb610e70f6b3e1c3ca20ff24459659231ef2f8c48e879e2d32b73da"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:bc84e875994c3b445871ea7181d424588171efec3e185dced958dad9e001950a"},
-    {file = "cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8a15fb869670efa8f83cbffbc8753c1abf236883225aed74cd179b720ac9ec80"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2ae6971afd6246710480e3f15824ed3029a60fc16991db250034efd0b9fb4356"},
-    {file = "cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:fdc3daab53b212472f1524d070735b2f0c214239df131903bae1d598016fa822"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d861ee9e76ace6cf36a6a89b959ec08e7bc2493ee39d07ffe5acb23ef46d27da"},
-    {file = "cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:44cc0675b27cadb71bdbb96099cca1fa051cd11d2ade09e5cd3a2edb929ed947"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:2b7a67c9cd56372f3249b39699f2ad479f6991e62ea15800973b956f4b73e257"},
-    {file = "cryptography-46.0.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be8c01a7d5a55f9a47d1888162b76c8f49d62b234d88f0ff91a9fbebe32ffbc3"},
+    {file = "cryptography-46.0.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7"},
-    {file = "cryptography-46.0.4.tar.gz", hash = "sha256:bfd019f60f8abc2ed1b9be4ddc21cfef059c841d86d710bb69909a688cbb8f59"},
+    {file = "cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d"},
 ]
 [package.dependencies]
@@ -516,7 +516,7 @@ nox = ["nox[uv] (>=2024.4.15)"]
 pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.14)", "ruff (>=0.11.11)"]
 sdist = ["build (>=1.0.0)"]
 ssh = ["bcrypt (>=3.1.5)"]
-test = ["certifi (>=2024)", "cryptography-vectors (==46.0.4)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"]
+test = ["certifi (>=2024)", "cryptography-vectors (==46.0.5)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"]
 test-randomorder = ["pytest-randomly"]
 [[package]]
@@ -570,24 +570,25 @@ tests = ["coverage", "coveralls", "dill", "mock", "nose"]
 [[package]]
 name = "fastapi"
-version = "0.128.0"
+version = "0.128.7"
 description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d"},
+    {file = "fastapi-0.128.7-py3-none-any.whl", hash = "sha256:6bd9bd31cb7047465f2d3fa3ba3f33b0870b17d4eaf7cdb36d1576ab060ad662"},
-    {file = "fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a"},
+    {file = "fastapi-0.128.7.tar.gz", hash = "sha256:783c273416995486c155ad2c0e2b45905dedfaf20b9ef8d9f6a9124670639a24"},
 ]
 [package.dependencies]
 annotated-doc = ">=0.0.2"
 pydantic = ">=2.7.0"
-starlette = ">=0.40.0,<0.51.0"
+starlette = ">=0.40.0,<1.0.0"
 typing-extensions = ">=4.8.0"
 typing-inspection = ">=0.4.2"
 [package.extras]
-all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=3.1.5)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
+all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=3.1.5)", "orjson (>=3.9.3)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "pyyaml (>=5.3.1)", "ujson (>=5.8.0)", "uvicorn[standard] (>=0.12.0)"]
 standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "jinja2 (>=3.1.5)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"]
 standard-no-fastapi-cloud-cli = ["email-validator (>=2.0.0)", "fastapi-cli[standard-no-fastapi-cloud-cli] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "jinja2 (>=3.1.5)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"]
@@ -1062,14 +1063,14 @@ urllib3 = ">=1.26.0,<3"
 [[package]]
 name = "launchdarkly-server-sdk"
-version = "9.14.1"
+version = "9.15.0"
 description = "LaunchDarkly SDK for Python"
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.10"
 groups = ["main"]
 files = [
-    {file = "launchdarkly_server_sdk-9.14.1-py3-none-any.whl", hash = "sha256:a9e2bd9ecdef845cd631ae0d4334a1115e5b44257c42eb2349492be4bac7815c"},
+    {file = "launchdarkly_server_sdk-9.15.0-py3-none-any.whl", hash = "sha256:c267e29bfa3fb5e2a06a208448ada6ed5557a2924979b8d79c970b45d227c668"},
-    {file = "launchdarkly_server_sdk-9.14.1.tar.gz", hash = "sha256:1df44baf0a0efa74d8c1dad7a00592b98bce7d19edded7f770da8dbc49922213"},
+    {file = "launchdarkly_server_sdk-9.15.0.tar.gz", hash = "sha256:f31441b74bc1a69c381db57c33116509e407a2612628ad6dff0a7dbb39d5020b"},
 ]
 [package.dependencies]
@@ -1478,14 +1479,14 @@ testing = ["coverage", "pytest", "pytest-benchmark"]
 [[package]]
 name = "postgrest"
-version = "2.27.2"
+version = "2.28.0"
 description = "PostgREST client for Python. This library provides an ORM interface to PostgREST."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "postgrest-2.27.2-py3-none-any.whl", hash = "sha256:1666fef3de05ca097a314433dd5ae2f2d71c613cb7b233d0f468c4ffe37277da"},
+    {file = "postgrest-2.28.0-py3-none-any.whl", hash = "sha256:7bca2f24dd1a1bf8a3d586c7482aba6cd41662da6733045fad585b63b7f7df75"},
-    {file = "postgrest-2.27.2.tar.gz", hash = "sha256:55407d530b5af3d64e883a71fec1f345d369958f723ce4a8ab0b7d169e313242"},
+    {file = "postgrest-2.28.0.tar.gz", hash = "sha256:c36b38646d25ea4255321d3d924ce70f8d20ec7799cb42c1221d6a818d4f6515"},
 ]
 [package.dependencies]
@@ -2248,14 +2249,14 @@ cli = ["click (>=5.0)"]
 [[package]]
 name = "realtime"
-version = "2.27.2"
+version = "2.28.0"
 description = ""
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "realtime-2.27.2-py3-none-any.whl", hash = "sha256:34a9cbb26a274e707e8fc9e3ee0a66de944beac0fe604dc336d1e985db2c830f"},
+    {file = "realtime-2.28.0-py3-none-any.whl", hash = "sha256:db1bd59bab9b1fcc9f9d3b1a073bed35bf4994d720e6751f10031a58d57a3836"},
-    {file = "realtime-2.27.2.tar.gz", hash = "sha256:b960a90294d2cea1b3f1275ecb89204304728e08fff1c393cc1b3150739556b3"},
+    {file = "realtime-2.28.0.tar.gz", hash = "sha256:d18cedcebd6a8f22fcd509bc767f639761eb218b7b2b6f14fc4205b6259b50fc"},
 ]
 [package.dependencies]
@@ -2436,14 +2437,14 @@ full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart
 [[package]]
 name = "storage3"
-version = "2.27.2"
+version = "2.28.0"
 description = "Supabase Storage client for Python."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "storage3-2.27.2-py3-none-any.whl", hash = "sha256:e6f16e7a260729e7b1f46e9bf61746805a02e30f5e419ee1291007c432e3ec63"},
+    {file = "storage3-2.28.0-py3-none-any.whl", hash = "sha256:ecb50efd2ac71dabbdf97e99ad346eafa630c4c627a8e5a138ceb5fbbadae716"},
-    {file = "storage3-2.27.2.tar.gz", hash = "sha256:cb4807b7f86b4bb1272ac6fdd2f3cfd8ba577297046fa5f88557425200275af5"},
+    {file = "storage3-2.28.0.tar.gz", hash = "sha256:bc1d008aff67de7a0f2bd867baee7aadbcdb6f78f5a310b4f7a38e8c13c19865"},
 ]
 [package.dependencies]
@@ -2487,35 +2488,35 @@ python-dateutil = ">=2.6.0"
 [[package]]
 name = "supabase"
-version = "2.27.2"
+version = "2.28.0"
 description = "Supabase client for Python."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "supabase-2.27.2-py3-none-any.whl", hash = "sha256:d4dce00b3a418ee578017ec577c0e5be47a9a636355009c76f20ed2faa15bc54"},
+    {file = "supabase-2.28.0-py3-none-any.whl", hash = "sha256:42776971c7d0ccca16034df1ab96a31c50228eb1eb19da4249ad2f756fc20272"},
-    {file = "supabase-2.27.2.tar.gz", hash = "sha256:2aed40e4f3454438822442a1e94a47be6694c2c70392e7ae99b51a226d4293f7"},
+    {file = "supabase-2.28.0.tar.gz", hash = "sha256:aea299aaab2a2eed3c57e0be7fc035c6807214194cce795a3575add20268ece1"},
 ]
 [package.dependencies]
 httpx = ">=0.26,<0.29"
-postgrest = "2.27.2"
+postgrest = "2.28.0"
-realtime = "2.27.2"
+realtime = "2.28.0"
-storage3 = "2.27.2"
+storage3 = "2.28.0"
-supabase-auth = "2.27.2"
+supabase-auth = "2.28.0"
-supabase-functions = "2.27.2"
+supabase-functions = "2.28.0"
 yarl = ">=1.22.0"
 [[package]]
 name = "supabase-auth"
-version = "2.27.2"
+version = "2.28.0"
 description = "Python Client Library for Supabase Auth"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "supabase_auth-2.27.2-py3-none-any.whl", hash = "sha256:78ec25b11314d0a9527a7205f3b1c72560dccdc11b38392f80297ef98664ee91"},
+    {file = "supabase_auth-2.28.0-py3-none-any.whl", hash = "sha256:2ac85026cc285054c7fa6d41924f3a333e9ec298c013e5b5e1754039ba7caec9"},
-    {file = "supabase_auth-2.27.2.tar.gz", hash = "sha256:0f5bcc79b3677cb42e9d321f3c559070cfa40d6a29a67672cc8382fb7dc2fe97"},
+    {file = "supabase_auth-2.28.0.tar.gz", hash = "sha256:2bb8f18ff39934e44b28f10918db965659f3735cd6fbfcc022fe0b82dbf8233e"},
 ]
 [package.dependencies]
@@ -2525,14 +2526,14 @@ pyjwt = {version = ">=2.10.1", extras = ["crypto"]}
 [[package]]
 name = "supabase-functions"
-version = "2.27.2"
+version = "2.28.0"
 description = "Library for Supabase Functions"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "supabase_functions-2.27.2-py3-none-any.whl", hash = "sha256:db480efc669d0bca07605b9b6f167312af43121adcc842a111f79bea416ef754"},
+    {file = "supabase_functions-2.28.0-py3-none-any.whl", hash = "sha256:30bf2d586f8df285faf0621bb5d5bb3ec3157234fc820553ca156f009475e4ae"},
-    {file = "supabase_functions-2.27.2.tar.gz", hash = "sha256:d0c8266207a94371cb3fd35ad3c7f025b78a97cf026861e04ccd35ac1775f80b"},
+    {file = "supabase_functions-2.28.0.tar.gz", hash = "sha256:db3dddfc37aca5858819eb461130968473bd8c75bd284581013958526dac718b"},
 ]
 [package.dependencies]
@@ -2911,4 +2912,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<4.0"
-content-hash = "40eae94995dc0a388fa832ed4af9b6137f28d5b5ced3aaea70d5f91d4d9a179d"
+content-hash = "9619cae908ad38fa2c48016a58bcf4241f6f5793aa0e6cc140276e91c433cbbb"
--- a/autogpt_platform/autogpt_libs/pyproject.toml
+++ b/autogpt_platform/autogpt_libs/pyproject.toml
@@ -11,14 +11,14 @@ python = ">=3.10,<4.0"
 colorama = "^0.4.6"
 cryptography = "^46.0"
 expiringdict = "^1.2.2"
-fastapi = "^0.128.0"
+fastapi = "^0.128.7"
 google-cloud-logging = "^3.13.0"
-launchdarkly-server-sdk = "^9.14.1"
+launchdarkly-server-sdk = "^9.15.0"
 pydantic = "^2.12.5"
 pydantic-settings = "^2.12.0"
 pyjwt = { version = "^2.11.0", extras = ["crypto"] }
 redis = "^6.2.0"
-supabase = "^2.27.2"
+supabase = "^2.28.0"
 uvicorn = "^0.40.0"
 [tool.poetry.group.dev.dependencies]
--- a/autogpt_platform/backend/Dockerfile
+++ b/autogpt_platform/backend/Dockerfile
@@ -1,3 +1,5 @@
 # ============================ DEPENDENCY BUILDER ============================ #
 FROM debian:13-slim AS builder
 # Set environment variables
@@ -51,7 +53,9 @@ COPY autogpt_platform/backend/backend/data/partial_types.py ./backend/data/parti
 COPY autogpt_platform/backend/gen_prisma_types_stub.py ./
 RUN poetry run prisma generate && poetry run gen-prisma-stub
-FROM debian:13-slim AS server_dependencies
+# ============================== BACKEND SERVER ============================== #
 FROM debian:13-slim AS server
 WORKDIR /app
@@ -62,16 +66,21 @@ ENV POETRY_HOME=/opt/poetry \
    DEBIAN_FRONTEND=noninteractive
 ENV PATH=/opt/poetry/bin:$PATH
-# Install Python, FFmpeg, and ImageMagick (required for video processing blocks)
+# Install Python, FFmpeg, ImageMagick, and CLI tools for agent use.
-RUN apt-get update && apt-get install -y \
+# bubblewrap provides OS-level sandbox (whitelist-only FS + no network)
 # for the bash_exec MCP tool.
 # Using --no-install-recommends saves ~650MB by skipping unnecessary deps like llvm, mesa, etc.
 RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.13 \
    python3-pip \
    ffmpeg \
    imagemagick \
    jq \
    ripgrep \
    tree \
    bubblewrap \
    && rm -rf /var/lib/apt/lists/*
 # Copy only necessary files from builder
 COPY --from=builder /app /app
 COPY --from=builder /usr/local/lib/python3* /usr/local/lib/python3*
 COPY --from=builder /usr/local/bin/poetry /usr/local/bin/poetry
 # Copy Node.js installation for Prisma
@@ -81,30 +90,54 @@ COPY --from=builder /usr/bin/npm /usr/bin/npm
 COPY --from=builder /usr/bin/npx /usr/bin/npx
 COPY --from=builder /root/.cache/prisma-python/binaries /root/.cache/prisma-python/binaries
 ENV PATH="/app/autogpt_platform/backend/.venv/bin:$PATH"
 RUN mkdir -p /app/autogpt_platform/autogpt_libs
 RUN mkdir -p /app/autogpt_platform/backend
 COPY autogpt_platform/autogpt_libs /app/autogpt_platform/autogpt_libs
 COPY autogpt_platform/backend/poetry.lock autogpt_platform/backend/pyproject.toml /app/autogpt_platform/backend/
 WORKDIR /app/autogpt_platform/backend
-FROM server_dependencies AS migrate
+# Copy only the .venv from builder (not the entire /app directory)
 # The .venv includes the generated Prisma client
 COPY --from=builder /app/autogpt_platform/backend/.venv ./.venv
 ENV PATH="/app/autogpt_platform/backend/.venv/bin:$PATH"
-# Migration stage only needs schema and migrations - much lighter than full backend
+# Copy dependency files + autogpt_libs (path dependency)
-COPY autogpt_platform/backend/schema.prisma /app/autogpt_platform/backend/
+COPY autogpt_platform/autogpt_libs /app/autogpt_platform/autogpt_libs
-COPY autogpt_platform/backend/backend/data/partial_types.py /app/autogpt_platform/backend/backend/data/partial_types.py
+COPY autogpt_platform/backend/poetry.lock autogpt_platform/backend/pyproject.toml ./
 COPY autogpt_platform/backend/migrations /app/autogpt_platform/backend/migrations
-FROM server_dependencies AS server
+# Copy backend code + docs (for Copilot docs search)
-
+COPY autogpt_platform/backend ./
 COPY autogpt_platform/backend /app/autogpt_platform/backend
 COPY docs /app/docs
 RUN poetry install --no-ansi --only-root
 ENV PORT=8000
 CMD ["poetry", "run", "rest"]
 # =============================== DB MIGRATOR =============================== #
 # Lightweight migrate stage - only needs Prisma CLI, not full Python environment
 FROM debian:13-slim AS migrate
 WORKDIR /app/autogpt_platform/backend
 ENV DEBIAN_FRONTEND=noninteractive
 # Install only what's needed for prisma migrate: Node.js and minimal Python for prisma-python
 RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.13 \
    python3-pip \
    ca-certificates \
    && rm -rf /var/lib/apt/lists/*
 # Copy Node.js from builder (needed for Prisma CLI)
 COPY --from=builder /usr/bin/node /usr/bin/node
 COPY --from=builder /usr/lib/node_modules /usr/lib/node_modules
 COPY --from=builder /usr/bin/npm /usr/bin/npm
 # Copy Prisma binaries
 COPY --from=builder /root/.cache/prisma-python/binaries /root/.cache/prisma-python/binaries
 # Install prisma-client-py directly (much smaller than copying full venv)
 RUN pip3 install prisma>=0.15.0 --break-system-packages
 COPY autogpt_platform/backend/schema.prisma ./
 COPY autogpt_platform/backend/backend/data/partial_types.py ./backend/data/partial_types.py
 COPY autogpt_platform/backend/gen_prisma_types_stub.py ./
 COPY autogpt_platform/backend/migrations ./migrations
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -27,12 +27,11 @@ class ChatConfig(BaseSettings):
    session_ttl: int = Field(default=43200, description="Session TTL in seconds")
    # Streaming Configuration
    max_context_messages: int = Field(
        default=50, ge=1, le=200, description="Maximum context messages"
    )
    stream_timeout: int = Field(default=300, description="Stream timeout in seconds")
-    max_retries: int = Field(default=3, description="Maximum number of retries")
+    max_retries: int = Field(
        default=3,
        description="Max retries for fallback path (SDK handles retries internally)",
    )
    max_agent_runs: int = Field(default=30, description="Maximum number of agent runs")
    max_agent_schedules: int = Field(
        default=30, description="Maximum number of agent schedules"
@@ -93,6 +92,31 @@ class ChatConfig(BaseSettings):
        description="Name of the prompt in Langfuse to fetch",
    )
    # Claude Agent SDK Configuration
    use_claude_agent_sdk: bool = Field(
        default=True,
        description="Use Claude Agent SDK for chat completions",
    )
    claude_agent_model: str | None = Field(
        default=None,
        description="Model for the Claude Agent SDK path. If None, derives from "
        "the `model` field by stripping the OpenRouter provider prefix.",
    )
    claude_agent_max_buffer_size: int = Field(
        default=10 * 1024 * 1024,  # 10MB (default SDK is 1MB)
        description="Max buffer size in bytes for Claude Agent SDK JSON message parsing. "
        "Increase if tool outputs exceed the limit.",
    )
    claude_agent_max_subtasks: int = Field(
        default=10,
        description="Max number of sub-agent Tasks the SDK can spawn per session.",
    )
    claude_agent_use_resume: bool = Field(
        default=True,
        description="Use --resume for multi-turn conversations instead of "
        "history compression. Falls back to compression when unavailable.",
    )
    # Extended thinking configuration for Claude models
    thinking_enabled: bool = Field(
        default=True,
@@ -138,6 +162,17 @@ class ChatConfig(BaseSettings):
            v = os.getenv("CHAT_INTERNAL_API_KEY")
        return v
    @field_validator("use_claude_agent_sdk", mode="before")
    @classmethod
    def get_use_claude_agent_sdk(cls, v):
        """Get use_claude_agent_sdk from environment if not provided."""
        # Check environment variable - default to True if not set
        env_val = os.getenv("CHAT_USE_CLAUDE_AGENT_SDK", "").lower()
        if env_val:
            return env_val in ("true", "1", "yes", "on")
        # Default to True (SDK enabled by default)
        return True if v is None else v
    # Prompt paths for different contexts
    PROMPT_PATHS: dict[str, str] = {
        "default": "prompts/chat_system.md",
--- a/autogpt_platform/backend/backend/api/features/chat/model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model.py
@@ -334,9 +334,8 @@ async def _get_session_from_cache(session_id: str) -> ChatSession | None:
    try:
        session = ChatSession.model_validate_json(raw_session)
        logger.info(
-            f"Loading session {session_id} from cache: "
+            f"[CACHE] Loaded session {session_id}: {len(session.messages)} messages, "
-            f"message_count={len(session.messages)}, "
+            f"last_roles={[m.role for m in session.messages[-3:]]}"  # Last 3 roles
            f"roles={[m.role for m in session.messages]}"
        )
        return session
    except Exception as e:
@@ -378,11 +377,9 @@ async def _get_session_from_db(session_id: str) -> ChatSession | None:
        return None
    messages = prisma_session.Messages
-    logger.info(
+    logger.debug(
-        f"Loading session {session_id} from DB: "
+        f"[DB] Loaded session {session_id}: {len(messages) if messages else 0} messages, "
-        f"has_messages={messages is not None}, "
+        f"roles={[m.role for m in messages[-3:]] if messages else []}"  # Last 3 roles
        f"message_count={len(messages) if messages else 0}, "
        f"roles={[m.role for m in messages] if messages else []}"
    )
    return ChatSession.from_db(prisma_session, messages)
@@ -433,10 +430,9 @@ async def _save_session_to_db(
                    "function_call": msg.function_call,
                }
            )
-        logger.info(
+        logger.debug(
-            f"Saving {len(new_messages)} new messages to DB for session {session.session_id}: "
+            f"[DB] Saving {len(new_messages)} messages to session {session.session_id}, "
-            f"roles={[m['role'] for m in messages_data]}, "
+            f"roles={[m['role'] for m in messages_data]}"
            f"start_sequence={existing_message_count}"
        )
        await chat_db.add_chat_messages_batch(
            session_id=session.session_id,
@@ -476,7 +472,7 @@ async def get_chat_session(
        logger.warning(f"Unexpected cache error for session {session_id}: {e}")
    # Fall back to database
-    logger.info(f"Session {session_id} not in cache, checking database")
+    logger.debug(f"Session {session_id} not in cache, checking database")
    session = await _get_session_from_db(session_id)
    if session is None:
@@ -493,7 +489,6 @@ async def get_chat_session(
    # Cache the session from DB
    try:
        await _cache_session(session)
        logger.info(f"Cached session {session_id} from database")
    except Exception as e:
        logger.warning(f"Failed to cache session {session_id}: {e}")
@@ -558,6 +553,40 @@ async def upsert_chat_session(
        return session
 async def append_and_save_message(session_id: str, message: ChatMessage) -> ChatSession:
    """Atomically append a message to a session and persist it.
    Acquires the session lock, re-fetches the latest session state,
    appends the message, and saves — preventing message loss when
    concurrent requests modify the same session.
    """
    lock = await _get_session_lock(session_id)
    async with lock:
        session = await get_chat_session(session_id)
        if session is None:
            raise ValueError(f"Session {session_id} not found")
        session.messages.append(message)
        existing_message_count = await chat_db.get_chat_session_message_count(
            session_id
        )
        try:
            await _save_session_to_db(session, existing_message_count)
        except Exception as e:
            raise DatabaseError(
                f"Failed to persist message to session {session_id}"
            ) from e
        try:
            await _cache_session(session)
        except Exception as e:
            logger.warning(f"Cache write failed for session {session_id}: {e}")
        return session
 async def create_chat_session(user_id: str) -> ChatSession:
    """Create a new chat session and persist it.
@@ -664,13 +693,19 @@ async def update_session_title(session_id: str, title: str) -> bool:
            logger.warning(f"Session {session_id} not found for title update")
            return False
-        # Invalidate cache so next fetch gets updated title
+        # Update title in cache if it exists (instead of invalidating).
        # This prevents race conditions where cache invalidation causes
        # the frontend to see stale DB data while streaming is still in progress.
        try:
-            redis_key = _get_session_cache_key(session_id)
+            cached = await _get_session_from_cache(session_id)
-            async_redis = await get_redis_async()
+            if cached:
-            await async_redis.delete(redis_key)
+                cached.title = title
                await _cache_session(cached)
        except Exception as e:
-            logger.warning(f"Failed to invalidate cache for session {session_id}: {e}")
+            # Not critical - title will be correct on next full cache refresh
            logger.warning(
                f"Failed to update title in cache for session {session_id}: {e}"
            )
        return True
    except Exception as e:
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -1,5 +1,6 @@
 """Chat API routes for chat session management and streaming via SSE."""
 import asyncio
 import logging
 import uuid as uuid_module
 from collections.abc import AsyncGenerator
@@ -11,19 +12,29 @@ from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from backend.util.exceptions import NotFoundError
 from backend.util.feature_flag import Flag, is_feature_enabled
 from . import service as chat_service
 from . import stream_registry
 from .completion_handler import process_operation_failure, process_operation_success
 from .config import ChatConfig
-from .model import ChatSession, create_chat_session, get_chat_session, get_user_sessions
+from .model import (
-from .response_model import StreamFinish, StreamHeartbeat
+    ChatMessage,
    ChatSession,
    append_and_save_message,
    create_chat_session,
    get_chat_session,
    get_user_sessions,
 )
 from .response_model import StreamError, StreamFinish, StreamHeartbeat, StreamStart
 from .sdk import service as sdk_service
 from .tools.models import (
    AgentDetailsResponse,
    AgentOutputResponse,
    AgentPreviewResponse,
    AgentSavedResponse,
    AgentsFoundResponse,
    BlockDetailsResponse,
    BlockListResponse,
    BlockOutputResponse,
    ClarificationNeededResponse,
@@ -40,6 +51,7 @@ from .tools.models import (
    SetupRequirementsResponse,
    UnderstandingUpdatedResponse,
 )
 from .tracking import track_user_message
 config = ChatConfig()
@@ -231,6 +243,10 @@ async def get_session(
    active_task, last_message_id = await stream_registry.get_active_task_for_session(
        session_id, user_id
    )
    logger.info(
        f"[GET_SESSION] session={session_id}, active_task={active_task is not None}, "
        f"msg_count={len(messages)}, last_role={messages[-1].get('role') if messages else 'none'}"
    )
    if active_task:
        # Filter out the in-progress assistant message from the session response.
        # The client will receive the complete assistant response through the SSE
@@ -300,10 +316,9 @@ async def stream_chat_post(
        f"user={user_id}, message_len={len(request.message)}",
        extra={"json_fields": log_meta},
    )
    session = await _validate_and_get_session(session_id, user_id)
    logger.info(
-        f"[TIMING] session validated in {(time.perf_counter() - stream_start_time)*1000:.1f}ms",
+        f"[TIMING] session validated in {(time.perf_counter() - stream_start_time) * 1000:.1f}ms",
        extra={
            "json_fields": {
                **log_meta,
@@ -312,6 +327,25 @@ async def stream_chat_post(
        },
    )
    # Atomically append user message to session BEFORE creating task to avoid
    # race condition where GET_SESSION sees task as "running" but message isn't
    # saved yet.  append_and_save_message re-fetches inside a lock to prevent
    # message loss from concurrent requests.
    if request.message:
        message = ChatMessage(
            role="user" if request.is_user_message else "assistant",
            content=request.message,
        )
        if request.is_user_message:
            track_user_message(
                user_id=user_id,
                session_id=session_id,
                message_length=len(request.message),
            )
        logger.info(f"[STREAM] Saving user message to session {session_id}")
        session = await append_and_save_message(session_id, message)
        logger.info(f"[STREAM] User message saved for session {session_id}")
    # Create a task in the stream registry for reconnection support
    task_id = str(uuid_module.uuid4())
    operation_id = str(uuid_module.uuid4())
@@ -327,7 +361,7 @@ async def stream_chat_post(
        operation_id=operation_id,
    )
    logger.info(
-        f"[TIMING] create_task completed in {(time.perf_counter() - task_create_start)*1000:.1f}ms",
+        f"[TIMING] create_task completed in {(time.perf_counter() - task_create_start) * 1000:.1f}ms",
        extra={
            "json_fields": {
                **log_meta,
@@ -348,15 +382,47 @@ async def stream_chat_post(
        first_chunk_time, ttfc = None, None
        chunk_count = 0
        try:
-            async for chunk in chat_service.stream_chat_completion(
+            # Emit a start event with task_id for reconnection
            start_chunk = StreamStart(messageId=task_id, taskId=task_id)
            await stream_registry.publish_chunk(task_id, start_chunk)
            logger.info(
                f"[TIMING] StreamStart published at {(time_module.perf_counter() - gen_start_time) * 1000:.1f}ms",
                extra={
                    "json_fields": {
                        **log_meta,
                        "elapsed_ms": (time_module.perf_counter() - gen_start_time)
                        * 1000,
                    }
                },
            )
            # Choose service based on LaunchDarkly flag (falls back to config default)
            use_sdk = await is_feature_enabled(
                Flag.COPILOT_SDK,
                user_id or "anonymous",
                default=config.use_claude_agent_sdk,
            )
            stream_fn = (
                sdk_service.stream_chat_completion_sdk
                if use_sdk
                else chat_service.stream_chat_completion
            )
            logger.info(
                f"[TIMING] Calling {'sdk' if use_sdk else 'standard'} stream_chat_completion",
                extra={"json_fields": log_meta},
            )
            # Pass message=None since we already added it to the session above
            async for chunk in stream_fn(
                session_id,
-                request.message,
+                None,  # Message already in session
                is_user_message=request.is_user_message,
                user_id=user_id,
-                session=session,  # Pass pre-fetched session to avoid double-fetch
+                session=session,  # Pass session with message already added
                context=request.context,
                _task_id=task_id,  # Pass task_id so service emits start with taskId for reconnection
            ):
                # Skip duplicate StreamStart — we already published one above
                if isinstance(chunk, StreamStart):
                    continue
                chunk_count += 1
                if first_chunk_time is None:
                    first_chunk_time = time_module.perf_counter()
@@ -377,7 +443,7 @@ async def stream_chat_post(
            gen_end_time = time_module.perf_counter()
            total_time = (gen_end_time - gen_start_time) * 1000
            logger.info(
-                f"[TIMING] run_ai_generation FINISHED in {total_time/1000:.1f}s; "
+                f"[TIMING] run_ai_generation FINISHED in {total_time / 1000:.1f}s; "
                f"task={task_id}, session={session_id}, "
                f"ttfc={ttfc or -1:.2f}s, n_chunks={chunk_count}",
                extra={
@@ -404,6 +470,17 @@ async def stream_chat_post(
                    }
                },
            )
            # Publish a StreamError so the frontend can display an error message
            try:
                await stream_registry.publish_chunk(
                    task_id,
                    StreamError(
                        errorText="An error occurred. Please try again.",
                        code="stream_error",
                    ),
                )
            except Exception:
                pass  # Best-effort; mark_task_completed will publish StreamFinish
            await stream_registry.mark_task_completed(task_id, "failed")
    # Start the AI generation in a background task
@@ -506,8 +583,14 @@ async def stream_chat_post(
                    "json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}
                },
            )
            # Surface error to frontend so it doesn't appear stuck
            yield StreamError(
                errorText="An error occurred. Please try again.",
                code="stream_error",
            ).to_sse()
            yield StreamFinish().to_sse()
        finally:
-            # Unsubscribe when client disconnects or stream ends to prevent resource leak
+            # Unsubscribe when client disconnects or stream ends
            if subscriber_queue is not None:
                try:
                    await stream_registry.unsubscribe_from_task(
@@ -751,8 +834,6 @@ async def stream_task(
        )
    async def event_generator() -> AsyncGenerator[str, None]:
        import asyncio
        heartbeat_interval = 15.0  # Send heartbeat every 15 seconds
        try:
            while True:
@@ -971,6 +1052,7 @@ ToolResponseUnion = (
    | AgentSavedResponse
    | ClarificationNeededResponse
    | BlockListResponse
    | BlockDetailsResponse
    | BlockOutputResponse
    | DocSearchResultsResponse
    | DocPageResponse
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/init.py
@@ -0,0 +1,14 @@
 """Claude Agent SDK integration for CoPilot.
 This module provides the integration layer between the Claude Agent SDK
 and the existing CoPilot tool system, enabling drop-in replacement of
 the current LLM orchestration with the battle-tested Claude Agent SDK.
 """
 from .service import stream_chat_completion_sdk
 from .tool_adapter import create_copilot_mcp_server
 __all__ = [
    "stream_chat_completion_sdk",
    "create_copilot_mcp_server",
 ]
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter.py
@@ -0,0 +1,203 @@
 """Response adapter for converting Claude Agent SDK messages to Vercel AI SDK format.
 This module provides the adapter layer that converts streaming messages from
 the Claude Agent SDK into the Vercel AI SDK UI Stream Protocol format that
 the frontend expects.
 """
 import json
 import logging
 import uuid
 from claude_agent_sdk import (
    AssistantMessage,
    Message,
    ResultMessage,
    SystemMessage,
    TextBlock,
    ToolResultBlock,
    ToolUseBlock,
    UserMessage,
 )
 from backend.api.features.chat.response_model import (
    StreamBaseResponse,
    StreamError,
    StreamFinish,
    StreamFinishStep,
    StreamStart,
    StreamStartStep,
    StreamTextDelta,
    StreamTextEnd,
    StreamTextStart,
    StreamToolInputAvailable,
    StreamToolInputStart,
    StreamToolOutputAvailable,
 )
 from backend.api.features.chat.sdk.tool_adapter import (
    MCP_TOOL_PREFIX,
    pop_pending_tool_output,
 )
 logger = logging.getLogger(__name__)
 class SDKResponseAdapter:
    """Adapter for converting Claude Agent SDK messages to Vercel AI SDK format.
    This class maintains state during a streaming session to properly track
    text blocks, tool calls, and message lifecycle.
    """
    def __init__(self, message_id: str | None = None):
        self.message_id = message_id or str(uuid.uuid4())
        self.text_block_id = str(uuid.uuid4())
        self.has_started_text = False
        self.has_ended_text = False
        self.current_tool_calls: dict[str, dict[str, str]] = {}
        self.task_id: str | None = None
        self.step_open = False
    def set_task_id(self, task_id: str) -> None:
        """Set the task ID for reconnection support."""
        self.task_id = task_id
    def convert_message(self, sdk_message: Message) -> list[StreamBaseResponse]:
        """Convert a single SDK message to Vercel AI SDK format."""
        responses: list[StreamBaseResponse] = []
        if isinstance(sdk_message, SystemMessage):
            if sdk_message.subtype == "init":
                responses.append(
                    StreamStart(messageId=self.message_id, taskId=self.task_id)
                )
                # Open the first step (matches non-SDK: StreamStart then StreamStartStep)
                responses.append(StreamStartStep())
                self.step_open = True
        elif isinstance(sdk_message, AssistantMessage):
            # After tool results, the SDK sends a new AssistantMessage for the
            # next LLM turn. Open a new step if the previous one was closed.
            if not self.step_open:
                responses.append(StreamStartStep())
                self.step_open = True
            for block in sdk_message.content:
                if isinstance(block, TextBlock):
                    if block.text:
                        self._ensure_text_started(responses)
                        responses.append(
                            StreamTextDelta(id=self.text_block_id, delta=block.text)
                        )
                elif isinstance(block, ToolUseBlock):
                    self._end_text_if_open(responses)
                    # Strip MCP prefix so frontend sees "find_block"
                    # instead of "mcp__copilot__find_block".
                    tool_name = block.name.removeprefix(MCP_TOOL_PREFIX)
                    responses.append(
                        StreamToolInputStart(toolCallId=block.id, toolName=tool_name)
                    )
                    responses.append(
                        StreamToolInputAvailable(
                            toolCallId=block.id,
                            toolName=tool_name,
                            input=block.input,
                        )
                    )
                    self.current_tool_calls[block.id] = {"name": tool_name}
        elif isinstance(sdk_message, UserMessage):
            # UserMessage carries tool results back from tool execution.
            content = sdk_message.content
            blocks = content if isinstance(content, list) else []
            for block in blocks:
                if isinstance(block, ToolResultBlock) and block.tool_use_id:
                    tool_info = self.current_tool_calls.get(block.tool_use_id, {})
                    tool_name = tool_info.get("name", "unknown")
                    # Prefer the stashed full output over the SDK's
                    # (potentially truncated) ToolResultBlock content.
                    # The SDK truncates large results, writing them to disk,
                    # which breaks frontend widget parsing.
                    output = pop_pending_tool_output(tool_name) or (
                        _extract_tool_output(block.content)
                    )
                    responses.append(
                        StreamToolOutputAvailable(
                            toolCallId=block.tool_use_id,
                            toolName=tool_name,
                            output=output,
                            success=not (block.is_error or False),
                        )
                    )
            # Close the current step after tool results — the next
            # AssistantMessage will open a new step for the continuation.
            if self.step_open:
                responses.append(StreamFinishStep())
                self.step_open = False
        elif isinstance(sdk_message, ResultMessage):
            self._end_text_if_open(responses)
            # Close the step before finishing.
            if self.step_open:
                responses.append(StreamFinishStep())
                self.step_open = False
            if sdk_message.subtype == "success":
                responses.append(StreamFinish())
            elif sdk_message.subtype in ("error", "error_during_execution"):
                error_msg = getattr(sdk_message, "result", None) or "Unknown error"
                responses.append(
                    StreamError(errorText=str(error_msg), code="sdk_error")
                )
                responses.append(StreamFinish())
            else:
                logger.warning(
                    f"Unexpected ResultMessage subtype: {sdk_message.subtype}"
                )
                responses.append(StreamFinish())
        else:
            logger.debug(f"Unhandled SDK message type: {type(sdk_message).__name__}")
        return responses
    def _ensure_text_started(self, responses: list[StreamBaseResponse]) -> None:
        """Start (or restart) a text block if needed."""
        if not self.has_started_text or self.has_ended_text:
            if self.has_ended_text:
                self.text_block_id = str(uuid.uuid4())
                self.has_ended_text = False
            responses.append(StreamTextStart(id=self.text_block_id))
            self.has_started_text = True
    def _end_text_if_open(self, responses: list[StreamBaseResponse]) -> None:
        """End the current text block if one is open."""
        if self.has_started_text and not self.has_ended_text:
            responses.append(StreamTextEnd(id=self.text_block_id))
            self.has_ended_text = True
 def _extract_tool_output(content: str | list[dict[str, str]] | None) -> str:
    """Extract a string output from a ToolResultBlock's content field."""
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        parts = [item.get("text", "") for item in content if item.get("type") == "text"]
        if parts:
            return "".join(parts)
        try:
            return json.dumps(content)
        except (TypeError, ValueError):
            return str(content)
    if content is None:
        return ""
    try:
        return json.dumps(content)
    except (TypeError, ValueError):
        return str(content)
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter_test.py
@@ -0,0 +1,366 @@
 """Unit tests for the SDK response adapter."""
 from claude_agent_sdk import (
    AssistantMessage,
    ResultMessage,
    SystemMessage,
    TextBlock,
    ToolResultBlock,
    ToolUseBlock,
    UserMessage,
 )
 from backend.api.features.chat.response_model import (
    StreamBaseResponse,
    StreamError,
    StreamFinish,
    StreamFinishStep,
    StreamStart,
    StreamStartStep,
    StreamTextDelta,
    StreamTextEnd,
    StreamTextStart,
    StreamToolInputAvailable,
    StreamToolInputStart,
    StreamToolOutputAvailable,
 )
 from .response_adapter import SDKResponseAdapter
 from .tool_adapter import MCP_TOOL_PREFIX
 def _adapter() -> SDKResponseAdapter:
    a = SDKResponseAdapter(message_id="msg-1")
    a.set_task_id("task-1")
    return a
 # -- SystemMessage -----------------------------------------------------------
 def test_system_init_emits_start_and_step():
    adapter = _adapter()
    results = adapter.convert_message(SystemMessage(subtype="init", data={}))
    assert len(results) == 2
    assert isinstance(results[0], StreamStart)
    assert results[0].messageId == "msg-1"
    assert results[0].taskId == "task-1"
    assert isinstance(results[1], StreamStartStep)
 def test_system_non_init_emits_nothing():
    adapter = _adapter()
    results = adapter.convert_message(SystemMessage(subtype="other", data={}))
    assert results == []
 # -- AssistantMessage with TextBlock -----------------------------------------
 def test_text_block_emits_step_start_and_delta():
    adapter = _adapter()
    msg = AssistantMessage(content=[TextBlock(text="hello")], model="test")
    results = adapter.convert_message(msg)
    assert len(results) == 3
    assert isinstance(results[0], StreamStartStep)
    assert isinstance(results[1], StreamTextStart)
    assert isinstance(results[2], StreamTextDelta)
    assert results[2].delta == "hello"
 def test_empty_text_block_emits_only_step():
    adapter = _adapter()
    msg = AssistantMessage(content=[TextBlock(text="")], model="test")
    results = adapter.convert_message(msg)
    # Empty text skipped, but step still opens
    assert len(results) == 1
    assert isinstance(results[0], StreamStartStep)
 def test_multiple_text_deltas_reuse_block_id():
    adapter = _adapter()
    msg1 = AssistantMessage(content=[TextBlock(text="a")], model="test")
    msg2 = AssistantMessage(content=[TextBlock(text="b")], model="test")
    r1 = adapter.convert_message(msg1)
    r2 = adapter.convert_message(msg2)
    # First gets step+start+delta, second only delta (block & step already started)
    assert len(r1) == 3
    assert isinstance(r1[0], StreamStartStep)
    assert isinstance(r1[1], StreamTextStart)
    assert len(r2) == 1
    assert isinstance(r2[0], StreamTextDelta)
    assert r1[1].id == r2[0].id  # same block ID
 # -- AssistantMessage with ToolUseBlock --------------------------------------
 def test_tool_use_emits_input_start_and_available():
    """Tool names arrive with MCP prefix and should be stripped for the frontend."""
    adapter = _adapter()
    msg = AssistantMessage(
        content=[
            ToolUseBlock(
                id="tool-1",
                name=f"{MCP_TOOL_PREFIX}find_agent",
                input={"q": "x"},
            )
        ],
        model="test",
    )
    results = adapter.convert_message(msg)
    assert len(results) == 3
    assert isinstance(results[0], StreamStartStep)
    assert isinstance(results[1], StreamToolInputStart)
    assert results[1].toolCallId == "tool-1"
    assert results[1].toolName == "find_agent"  # prefix stripped
    assert isinstance(results[2], StreamToolInputAvailable)
    assert results[2].toolName == "find_agent"  # prefix stripped
    assert results[2].input == {"q": "x"}
 def test_text_then_tool_ends_text_block():
    adapter = _adapter()
    text_msg = AssistantMessage(content=[TextBlock(text="thinking...")], model="test")
    tool_msg = AssistantMessage(
        content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
        model="test",
    )
    adapter.convert_message(text_msg)  # opens step + text
    results = adapter.convert_message(tool_msg)
    # Step already open, so: TextEnd, ToolInputStart, ToolInputAvailable
    assert len(results) == 3
    assert isinstance(results[0], StreamTextEnd)
    assert isinstance(results[1], StreamToolInputStart)
 # -- UserMessage with ToolResultBlock ----------------------------------------
 def test_tool_result_emits_output_and_finish_step():
    adapter = _adapter()
    # First register the tool call (opens step) — SDK sends prefixed name
    tool_msg = AssistantMessage(
        content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}find_agent", input={})],
        model="test",
    )
    adapter.convert_message(tool_msg)
    # Now send tool result
    result_msg = UserMessage(
        content=[ToolResultBlock(tool_use_id="t1", content="found 3 agents")]
    )
    results = adapter.convert_message(result_msg)
    assert len(results) == 2
    assert isinstance(results[0], StreamToolOutputAvailable)
    assert results[0].toolCallId == "t1"
    assert results[0].toolName == "find_agent"  # prefix stripped
    assert results[0].output == "found 3 agents"
    assert results[0].success is True
    assert isinstance(results[1], StreamFinishStep)
 def test_tool_result_error():
    adapter = _adapter()
    adapter.convert_message(
        AssistantMessage(
            content=[
                ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}run_agent", input={})
            ],
            model="test",
        )
    )
    result_msg = UserMessage(
        content=[ToolResultBlock(tool_use_id="t1", content="timeout", is_error=True)]
    )
    results = adapter.convert_message(result_msg)
    assert isinstance(results[0], StreamToolOutputAvailable)
    assert results[0].success is False
    assert isinstance(results[1], StreamFinishStep)
 def test_tool_result_list_content():
    adapter = _adapter()
    adapter.convert_message(
        AssistantMessage(
            content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
            model="test",
        )
    )
    result_msg = UserMessage(
        content=[
            ToolResultBlock(
                tool_use_id="t1",
                content=[
                    {"type": "text", "text": "line1"},
                    {"type": "text", "text": "line2"},
                ],
            )
        ]
    )
    results = adapter.convert_message(result_msg)
    assert isinstance(results[0], StreamToolOutputAvailable)
    assert results[0].output == "line1line2"
    assert isinstance(results[1], StreamFinishStep)
 def test_string_user_message_ignored():
    """A plain string UserMessage (not tool results) produces no output."""
    adapter = _adapter()
    results = adapter.convert_message(UserMessage(content="hello"))
    assert results == []
 # -- ResultMessage -----------------------------------------------------------
 def test_result_success_emits_finish_step_and_finish():
    adapter = _adapter()
    # Start some text first (opens step)
    adapter.convert_message(
        AssistantMessage(content=[TextBlock(text="done")], model="test")
    )
    msg = ResultMessage(
        subtype="success",
        duration_ms=100,
        duration_api_ms=50,
        is_error=False,
        num_turns=1,
        session_id="s1",
    )
    results = adapter.convert_message(msg)
    # TextEnd + FinishStep + StreamFinish
    assert len(results) == 3
    assert isinstance(results[0], StreamTextEnd)
    assert isinstance(results[1], StreamFinishStep)
    assert isinstance(results[2], StreamFinish)
 def test_result_error_emits_error_and_finish():
    adapter = _adapter()
    msg = ResultMessage(
        subtype="error",
        duration_ms=100,
        duration_api_ms=50,
        is_error=True,
        num_turns=0,
        session_id="s1",
        result="API rate limited",
    )
    results = adapter.convert_message(msg)
    # No step was open, so no FinishStep — just Error + Finish
    assert len(results) == 2
    assert isinstance(results[0], StreamError)
    assert "API rate limited" in results[0].errorText
    assert isinstance(results[1], StreamFinish)
 # -- Text after tools (new block ID) ----------------------------------------
 def test_text_after_tool_gets_new_block_id():
    adapter = _adapter()
    # Text -> Tool -> ToolResult -> Text should get a new text block ID and step
    adapter.convert_message(
        AssistantMessage(content=[TextBlock(text="before")], model="test")
    )
    adapter.convert_message(
        AssistantMessage(
            content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
            model="test",
        )
    )
    # Send tool result (closes step)
    adapter.convert_message(
        UserMessage(content=[ToolResultBlock(tool_use_id="t1", content="ok")])
    )
    results = adapter.convert_message(
        AssistantMessage(content=[TextBlock(text="after")], model="test")
    )
    # Should get StreamStartStep (new step) + StreamTextStart (new block) + StreamTextDelta
    assert len(results) == 3
    assert isinstance(results[0], StreamStartStep)
    assert isinstance(results[1], StreamTextStart)
    assert isinstance(results[2], StreamTextDelta)
    assert results[2].delta == "after"
 # -- Full conversation flow --------------------------------------------------
 def test_full_conversation_flow():
    """Simulate a complete conversation: init -> text -> tool -> result -> text -> finish."""
    adapter = _adapter()
    all_responses: list[StreamBaseResponse] = []
    # 1. Init
    all_responses.extend(
        adapter.convert_message(SystemMessage(subtype="init", data={}))
    )
    # 2. Assistant text
    all_responses.extend(
        adapter.convert_message(
            AssistantMessage(content=[TextBlock(text="Let me search")], model="test")
        )
    )
    # 3. Tool use
    all_responses.extend(
        adapter.convert_message(
            AssistantMessage(
                content=[
                    ToolUseBlock(
                        id="t1",
                        name=f"{MCP_TOOL_PREFIX}find_agent",
                        input={"query": "email"},
                    )
                ],
                model="test",
            )
        )
    )
    # 4. Tool result
    all_responses.extend(
        adapter.convert_message(
            UserMessage(
                content=[ToolResultBlock(tool_use_id="t1", content="Found 2 agents")]
            )
        )
    )
    # 5. More text
    all_responses.extend(
        adapter.convert_message(
            AssistantMessage(content=[TextBlock(text="I found 2")], model="test")
        )
    )
    # 6. Result
    all_responses.extend(
        adapter.convert_message(
            ResultMessage(
                subtype="success",
                duration_ms=500,
                duration_api_ms=400,
                is_error=False,
                num_turns=2,
                session_id="s1",
            )
        )
    )
    types = [type(r).__name__ for r in all_responses]
    assert types == [
        "StreamStart",
        "StreamStartStep",  # step 1: text + tool call
        "StreamTextStart",
        "StreamTextDelta",  # "Let me search"
        "StreamTextEnd",  # closed before tool
        "StreamToolInputStart",
        "StreamToolInputAvailable",
        "StreamToolOutputAvailable",  # tool result
        "StreamFinishStep",  # step 1 closed after tool result
        "StreamStartStep",  # step 2: continuation text
        "StreamTextStart",  # new block after tool
        "StreamTextDelta",  # "I found 2"
        "StreamTextEnd",  # closed by result
        "StreamFinishStep",  # step 2 closed
        "StreamFinish",
    ]
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
@@ -0,0 +1,335 @@
 """Security hooks for Claude Agent SDK integration.
 This module provides security hooks that validate tool calls before execution,
 ensuring multi-user isolation and preventing unauthorized operations.
 """
 import json
 import logging
 import os
 import re
 from collections.abc import Callable
 from typing import Any, cast
 from backend.api.features.chat.sdk.tool_adapter import MCP_TOOL_PREFIX
 logger = logging.getLogger(__name__)
 # Tools that are blocked entirely (CLI/system access).
 # "Bash" (capital) is the SDK built-in — it's NOT in allowed_tools but blocked
 # here as defence-in-depth.  The agent uses mcp__copilot__bash_exec instead,
 # which has kernel-level network isolation (unshare --net).
 BLOCKED_TOOLS = {
    "Bash",
    "bash",
    "shell",
    "exec",
    "terminal",
    "command",
 }
 # Tools allowed only when their path argument stays within the SDK workspace.
 # The SDK uses these to handle oversized tool results (writes to tool-results/
 # files, then reads them back) and for workspace file operations.
 WORKSPACE_SCOPED_TOOLS = {"Read", "Write", "Edit", "Glob", "Grep"}
 # Dangerous patterns in tool inputs
 DANGEROUS_PATTERNS = [
    r"sudo",
    r"rm\s+-rf",
    r"dd\s+if=",
    r"/etc/passwd",
    r"/etc/shadow",
    r"chmod\s+777",
    r"curl\s+.*\|.*sh",
    r"wget\s+.*\|.*sh",
    r"eval\s*\(",
    r"exec\s*\(",
    r"__import__",
    r"os\.system",
    r"subprocess",
 ]
 def _deny(reason: str) -> dict[str, Any]:
    """Return a hook denial response."""
    return {
        "hookSpecificOutput": {
            "hookEventName": "PreToolUse",
            "permissionDecision": "deny",
            "permissionDecisionReason": reason,
        }
    }
 def _validate_workspace_path(
    tool_name: str, tool_input: dict[str, Any], sdk_cwd: str | None
 ) -> dict[str, Any]:
    """Validate that a workspace-scoped tool only accesses allowed paths.
    Allowed directories:
    - The SDK working directory (``/tmp/copilot-<session>/``)
    - The SDK tool-results directory (``~/.claude/projects/…/tool-results/``)
    """
    path = tool_input.get("file_path") or tool_input.get("path") or ""
    if not path:
        # Glob/Grep without a path default to cwd which is already sandboxed
        return {}
    # Resolve relative paths against sdk_cwd (the SDK sets cwd so the LLM
    # naturally uses relative paths like "test.txt" instead of absolute ones).
    # Tilde paths (~/) are home-dir references, not relative — expand first.
    if path.startswith("~"):
        resolved = os.path.realpath(os.path.expanduser(path))
    elif not os.path.isabs(path) and sdk_cwd:
        resolved = os.path.realpath(os.path.join(sdk_cwd, path))
    else:
        resolved = os.path.realpath(path)
    # Allow access within the SDK working directory
    if sdk_cwd:
        norm_cwd = os.path.realpath(sdk_cwd)
        if resolved.startswith(norm_cwd + os.sep) or resolved == norm_cwd:
            return {}
    # Allow access to ~/.claude/projects/*/tool-results/ (big tool results)
    claude_dir = os.path.realpath(os.path.expanduser("~/.claude/projects"))
    tool_results_seg = os.sep + "tool-results" + os.sep
    if resolved.startswith(claude_dir + os.sep) and tool_results_seg in resolved:
        return {}
    logger.warning(
        f"Blocked {tool_name} outside workspace: {path} (resolved={resolved})"
    )
    workspace_hint = f" Allowed workspace: {sdk_cwd}" if sdk_cwd else ""
    return _deny(
        f"[SECURITY] Tool '{tool_name}' can only access files within the workspace "
        f"directory.{workspace_hint} "
        "This is enforced by the platform and cannot be bypassed."
    )
 def _validate_tool_access(
    tool_name: str, tool_input: dict[str, Any], sdk_cwd: str | None = None
 ) -> dict[str, Any]:
    """Validate that a tool call is allowed.
    Returns:
        Empty dict to allow, or dict with hookSpecificOutput to deny
    """
    # Block forbidden tools
    if tool_name in BLOCKED_TOOLS:
        logger.warning(f"Blocked tool access attempt: {tool_name}")
        return _deny(
            f"[SECURITY] Tool '{tool_name}' is blocked for security. "
            "This is enforced by the platform and cannot be bypassed. "
            "Use the CoPilot-specific MCP tools instead."
        )
    # Workspace-scoped tools: allowed only within the SDK workspace directory
    if tool_name in WORKSPACE_SCOPED_TOOLS:
        return _validate_workspace_path(tool_name, tool_input, sdk_cwd)
    # Check for dangerous patterns in tool input
    # Use json.dumps for predictable format (str() produces Python repr)
    input_str = json.dumps(tool_input) if tool_input else ""
    for pattern in DANGEROUS_PATTERNS:
        if re.search(pattern, input_str, re.IGNORECASE):
            logger.warning(
                f"Blocked dangerous pattern in tool input: {pattern} in {tool_name}"
            )
            return _deny(
                "[SECURITY] Input contains a blocked pattern. "
                "This is enforced by the platform and cannot be bypassed."
            )
    return {}
 def _validate_user_isolation(
    tool_name: str, tool_input: dict[str, Any], user_id: str | None
 ) -> dict[str, Any]:
    """Validate that tool calls respect user isolation."""
    # For workspace file tools, ensure path doesn't escape
    if "workspace" in tool_name.lower():
        path = tool_input.get("path", "") or tool_input.get("file_path", "")
        if path:
            # Check for path traversal
            if ".." in path or path.startswith("/"):
                logger.warning(
                    f"Blocked path traversal attempt: {path} by user {user_id}"
                )
                return {
                    "hookSpecificOutput": {
                        "hookEventName": "PreToolUse",
                        "permissionDecision": "deny",
                        "permissionDecisionReason": "Path traversal not allowed",
                    }
                }
    return {}
 def create_security_hooks(
    user_id: str | None,
    sdk_cwd: str | None = None,
    max_subtasks: int = 3,
    on_stop: Callable[[str, str], None] | None = None,
 ) -> dict[str, Any]:
    """Create the security hooks configuration for Claude Agent SDK.
    Includes security validation and observability hooks:
    - PreToolUse: Security validation before tool execution
    - PostToolUse: Log successful tool executions
    - PostToolUseFailure: Log and handle failed tool executions
    - PreCompact: Log context compaction events (SDK handles compaction automatically)
    - Stop: Capture transcript path for stateless resume (when *on_stop* is provided)
    Args:
        user_id: Current user ID for isolation validation
        sdk_cwd: SDK working directory for workspace-scoped tool validation
        max_subtasks: Maximum Task (sub-agent) spawns allowed per session
        on_stop: Callback ``(transcript_path, sdk_session_id)`` invoked when
            the SDK finishes processing — used to read the JSONL transcript
            before the CLI process exits.
    Returns:
        Hooks configuration dict for ClaudeAgentOptions
    """
    try:
        from claude_agent_sdk import HookMatcher
        from claude_agent_sdk.types import HookContext, HookInput, SyncHookJSONOutput
        # Per-session counter for Task sub-agent spawns
        task_spawn_count = 0
        async def pre_tool_use_hook(
            input_data: HookInput,
            tool_use_id: str | None,
            context: HookContext,
        ) -> SyncHookJSONOutput:
            """Combined pre-tool-use validation hook."""
            nonlocal task_spawn_count
            _ = context  # unused but required by signature
            tool_name = cast(str, input_data.get("tool_name", ""))
            tool_input = cast(dict[str, Any], input_data.get("tool_input", {}))
            # Rate-limit Task (sub-agent) spawns per session
            if tool_name == "Task":
                task_spawn_count += 1
                if task_spawn_count > max_subtasks:
                    logger.warning(
                        f"[SDK] Task limit reached ({max_subtasks}), user={user_id}"
                    )
                    return cast(
                        SyncHookJSONOutput,
                        _deny(
                            f"Maximum {max_subtasks} sub-tasks per session. "
                            "Please continue in the main conversation."
                        ),
                    )
            # Strip MCP prefix for consistent validation
            is_copilot_tool = tool_name.startswith(MCP_TOOL_PREFIX)
            clean_name = tool_name.removeprefix(MCP_TOOL_PREFIX)
            # Only block non-CoPilot tools; our MCP-registered tools
            # (including Read for oversized results) are already sandboxed.
            if not is_copilot_tool:
                result = _validate_tool_access(clean_name, tool_input, sdk_cwd)
                if result:
                    return cast(SyncHookJSONOutput, result)
            # Validate user isolation
            result = _validate_user_isolation(clean_name, tool_input, user_id)
            if result:
                return cast(SyncHookJSONOutput, result)
            logger.debug(f"[SDK] Tool start: {tool_name}, user={user_id}")
            return cast(SyncHookJSONOutput, {})
        async def post_tool_use_hook(
            input_data: HookInput,
            tool_use_id: str | None,
            context: HookContext,
        ) -> SyncHookJSONOutput:
            """Log successful tool executions for observability."""
            _ = context
            tool_name = cast(str, input_data.get("tool_name", ""))
            logger.debug(f"[SDK] Tool success: {tool_name}, tool_use_id={tool_use_id}")
            return cast(SyncHookJSONOutput, {})
        async def post_tool_failure_hook(
            input_data: HookInput,
            tool_use_id: str | None,
            context: HookContext,
        ) -> SyncHookJSONOutput:
            """Log failed tool executions for debugging."""
            _ = context
            tool_name = cast(str, input_data.get("tool_name", ""))
            error = input_data.get("error", "Unknown error")
            logger.warning(
                f"[SDK] Tool failed: {tool_name}, error={error}, "
                f"user={user_id}, tool_use_id={tool_use_id}"
            )
            return cast(SyncHookJSONOutput, {})
        async def pre_compact_hook(
            input_data: HookInput,
            tool_use_id: str | None,
            context: HookContext,
        ) -> SyncHookJSONOutput:
            """Log when SDK triggers context compaction.
            The SDK automatically compacts conversation history when it grows too large.
            This hook provides visibility into when compaction happens.
            """
            _ = context, tool_use_id
            trigger = input_data.get("trigger", "auto")
            logger.info(
                f"[SDK] Context compaction triggered: {trigger}, user={user_id}"
            )
            return cast(SyncHookJSONOutput, {})
        # --- Stop hook: capture transcript path for stateless resume ---
        async def stop_hook(
            input_data: HookInput,
            tool_use_id: str | None,
            context: HookContext,
        ) -> SyncHookJSONOutput:
            """Capture transcript path when SDK finishes processing.
            The Stop hook fires while the CLI process is still alive, giving us
            a reliable window to read the JSONL transcript before SIGTERM.
            """
            _ = context, tool_use_id
            transcript_path = cast(str, input_data.get("transcript_path", ""))
            sdk_session_id = cast(str, input_data.get("session_id", ""))
            if transcript_path and on_stop:
                logger.info(
                    f"[SDK] Stop hook: transcript_path={transcript_path}, "
                    f"sdk_session_id={sdk_session_id[:12]}..."
                )
                on_stop(transcript_path, sdk_session_id)
            return cast(SyncHookJSONOutput, {})
        hooks: dict[str, Any] = {
            "PreToolUse": [HookMatcher(matcher="*", hooks=[pre_tool_use_hook])],
            "PostToolUse": [HookMatcher(matcher="*", hooks=[post_tool_use_hook])],
            "PostToolUseFailure": [
                HookMatcher(matcher="*", hooks=[post_tool_failure_hook])
            ],
            "PreCompact": [HookMatcher(matcher="*", hooks=[pre_compact_hook])],
        }
        if on_stop is not None:
            hooks["Stop"] = [HookMatcher(matcher=None, hooks=[stop_hook])]
        return hooks
    except ImportError:
        # Fallback for when SDK isn't available - return empty hooks
        logger.warning("claude-agent-sdk not available, security hooks disabled")
        return {}
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks_test.py
@@ -0,0 +1,165 @@
 """Unit tests for SDK security hooks."""
 import os
 from .security_hooks import _validate_tool_access, _validate_user_isolation
 SDK_CWD = "/tmp/copilot-abc123"
 def _is_denied(result: dict) -> bool:
    hook = result.get("hookSpecificOutput", {})
    return hook.get("permissionDecision") == "deny"
 # -- Blocked tools -----------------------------------------------------------
 def test_blocked_tools_denied():
    for tool in ("bash", "shell", "exec", "terminal", "command"):
        result = _validate_tool_access(tool, {})
        assert _is_denied(result), f"{tool} should be blocked"
 def test_unknown_tool_allowed():
    result = _validate_tool_access("SomeCustomTool", {})
    assert result == {}
 # -- Workspace-scoped tools --------------------------------------------------
 def test_read_within_workspace_allowed():
    result = _validate_tool_access(
        "Read", {"file_path": f"{SDK_CWD}/file.txt"}, sdk_cwd=SDK_CWD
    )
    assert result == {}
 def test_write_within_workspace_allowed():
    result = _validate_tool_access(
        "Write", {"file_path": f"{SDK_CWD}/output.json"}, sdk_cwd=SDK_CWD
    )
    assert result == {}
 def test_edit_within_workspace_allowed():
    result = _validate_tool_access(
        "Edit", {"file_path": f"{SDK_CWD}/src/main.py"}, sdk_cwd=SDK_CWD
    )
    assert result == {}
 def test_glob_within_workspace_allowed():
    result = _validate_tool_access("Glob", {"path": f"{SDK_CWD}/src"}, sdk_cwd=SDK_CWD)
    assert result == {}
 def test_grep_within_workspace_allowed():
    result = _validate_tool_access("Grep", {"path": f"{SDK_CWD}/src"}, sdk_cwd=SDK_CWD)
    assert result == {}
 def test_read_outside_workspace_denied():
    result = _validate_tool_access(
        "Read", {"file_path": "/etc/passwd"}, sdk_cwd=SDK_CWD
    )
    assert _is_denied(result)
 def test_write_outside_workspace_denied():
    result = _validate_tool_access(
        "Write", {"file_path": "/home/user/secrets.txt"}, sdk_cwd=SDK_CWD
    )
    assert _is_denied(result)
 def test_traversal_attack_denied():
    result = _validate_tool_access(
        "Read",
        {"file_path": f"{SDK_CWD}/../../etc/passwd"},
        sdk_cwd=SDK_CWD,
    )
    assert _is_denied(result)
 def test_no_path_allowed():
    """Glob/Grep without a path argument defaults to cwd — should pass."""
    result = _validate_tool_access("Glob", {}, sdk_cwd=SDK_CWD)
    assert result == {}
 def test_read_no_cwd_denies_absolute():
    """If no sdk_cwd is set, absolute paths are denied."""
    result = _validate_tool_access("Read", {"file_path": "/tmp/anything"})
    assert _is_denied(result)
 # -- Tool-results directory --------------------------------------------------
 def test_read_tool_results_allowed():
    home = os.path.expanduser("~")
    path = f"{home}/.claude/projects/-tmp-copilot-abc123/tool-results/12345.txt"
    result = _validate_tool_access("Read", {"file_path": path}, sdk_cwd=SDK_CWD)
    assert result == {}
 def test_read_claude_projects_without_tool_results_denied():
    home = os.path.expanduser("~")
    path = f"{home}/.claude/projects/-tmp-copilot-abc123/settings.json"
    result = _validate_tool_access("Read", {"file_path": path}, sdk_cwd=SDK_CWD)
    assert _is_denied(result)
 # -- Built-in Bash is blocked (use bash_exec MCP tool instead) ---------------
 def test_bash_builtin_always_blocked():
    """SDK built-in Bash is blocked — bash_exec MCP tool with bubblewrap is used instead."""
    result = _validate_tool_access("Bash", {"command": "echo hello"}, sdk_cwd=SDK_CWD)
    assert _is_denied(result)
 # -- Dangerous patterns ------------------------------------------------------
 def test_dangerous_pattern_blocked():
    result = _validate_tool_access("SomeTool", {"cmd": "sudo rm -rf /"})
    assert _is_denied(result)
 def test_subprocess_pattern_blocked():
    result = _validate_tool_access("SomeTool", {"code": "subprocess.run(...)"})
    assert _is_denied(result)
 # -- User isolation ----------------------------------------------------------
 def test_workspace_path_traversal_blocked():
    result = _validate_user_isolation(
        "workspace_read", {"path": "../../../etc/shadow"}, user_id="user-1"
    )
    assert _is_denied(result)
 def test_workspace_absolute_path_blocked():
    result = _validate_user_isolation(
        "workspace_read", {"path": "/etc/passwd"}, user_id="user-1"
    )
    assert _is_denied(result)
 def test_workspace_normal_path_allowed():
    result = _validate_user_isolation(
        "workspace_read", {"path": "src/main.py"}, user_id="user-1"
    )
    assert result == {}
 def test_non_workspace_tool_passes_isolation():
    result = _validate_user_isolation(
        "find_agent", {"query": "email"}, user_id="user-1"
    )
    assert result == {}
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
@@ -0,0 +1,751 @@
 """Claude Agent SDK service layer for CoPilot chat completions."""
 import asyncio
 import json
 import logging
 import os
 import uuid
 from collections.abc import AsyncGenerator
 from dataclasses import dataclass
 from typing import Any
 from backend.util.exceptions import NotFoundError
 from .. import stream_registry
 from ..config import ChatConfig
 from ..model import (
    ChatMessage,
    ChatSession,
    get_chat_session,
    update_session_title,
    upsert_chat_session,
 )
 from ..response_model import (
    StreamBaseResponse,
    StreamError,
    StreamFinish,
    StreamStart,
    StreamTextDelta,
    StreamToolInputAvailable,
    StreamToolOutputAvailable,
 )
 from ..service import (
    _build_system_prompt,
    _execute_long_running_tool_with_streaming,
    _generate_session_title,
 )
 from ..tools.models import OperationPendingResponse, OperationStartedResponse
 from ..tools.sandbox import WORKSPACE_PREFIX, make_session_path
 from ..tracking import track_user_message
 from .response_adapter import SDKResponseAdapter
 from .security_hooks import create_security_hooks
 from .tool_adapter import (
    COPILOT_TOOL_NAMES,
    LongRunningCallback,
    create_copilot_mcp_server,
    set_execution_context,
 )
 from .transcript import (
    download_transcript,
    read_transcript_file,
    upload_transcript,
    validate_transcript,
    write_transcript_to_tempfile,
 )
 logger = logging.getLogger(__name__)
 config = ChatConfig()
 # Set to hold background tasks to prevent garbage collection
 _background_tasks: set[asyncio.Task[Any]] = set()
@dataclass
 class CapturedTranscript:
    """Info captured by the SDK Stop hook for stateless --resume."""
    path: str = ""
    sdk_session_id: str = ""
    @property
    def available(self) -> bool:
        return bool(self.path)
 _SDK_CWD_PREFIX = WORKSPACE_PREFIX
 # Appended to the system prompt to inform the agent about available tools.
 # The SDK built-in Bash is NOT available — use mcp__copilot__bash_exec instead,
 # which has kernel-level network isolation (unshare --net).
 _SDK_TOOL_SUPPLEMENT = """
 ## Tool notes
 - The SDK built-in Bash tool is NOT available.  Use the `bash_exec` MCP tool
  for shell commands — it runs in a network-isolated sandbox.
 - **Shared workspace**: The SDK Read/Write tools and `bash_exec` share the
  same working directory. Files created by one are readable by the other.
  These files are **ephemeral** — they exist only for the current session.
 - **Persistent storage**: Use `write_workspace_file` / `read_workspace_file`
  for files that should persist across sessions (stored in cloud storage).
 - Long-running tools (create_agent, edit_agent, etc.) are handled
  asynchronously.  You will receive an immediate response; the actual result
  is delivered to the user via a background stream.
 """
 def _build_long_running_callback(user_id: str | None) -> LongRunningCallback:
    """Build a callback that delegates long-running tools to the non-SDK infrastructure.
    Long-running tools (create_agent, edit_agent, etc.) are delegated to the
    existing background infrastructure: stream_registry (Redis Streams),
    database persistence, and SSE reconnection.  This means results survive
    page refreshes / pod restarts, and the frontend shows the proper loading
    widget with progress updates.
    The returned callback matches the ``LongRunningCallback`` signature:
    ``(tool_name, args, session) -> MCP response dict``.
    """
    async def _callback(
        tool_name: str, args: dict[str, Any], session: ChatSession
    ) -> dict[str, Any]:
        operation_id = str(uuid.uuid4())
        task_id = str(uuid.uuid4())
        tool_call_id = f"sdk-{uuid.uuid4().hex[:12]}"
        session_id = session.session_id
        # --- Build user-friendly messages (matches non-SDK service) ---
        if tool_name == "create_agent":
            desc = args.get("description", "")
            desc_preview = (desc[:100] + "...") if len(desc) > 100 else desc
            pending_msg = (
                f"Creating your agent: {desc_preview}"
                if desc_preview
                else "Creating agent... This may take a few minutes."
            )
            started_msg = (
                "Agent creation started. You can close this tab - "
                "check your library in a few minutes."
            )
        elif tool_name == "edit_agent":
            changes = args.get("changes", "")
            changes_preview = (changes[:100] + "...") if len(changes) > 100 else changes
            pending_msg = (
                f"Editing agent: {changes_preview}"
                if changes_preview
                else "Editing agent... This may take a few minutes."
            )
            started_msg = (
                "Agent edit started. You can close this tab - "
                "check your library in a few minutes."
            )
        else:
            pending_msg = f"Running {tool_name}... This may take a few minutes."
            started_msg = (
                f"{tool_name} started. You can close this tab - "
                "check back in a few minutes."
            )
        # --- Register task in Redis for SSE reconnection ---
        await stream_registry.create_task(
            task_id=task_id,
            session_id=session_id,
            user_id=user_id,
            tool_call_id=tool_call_id,
            tool_name=tool_name,
            operation_id=operation_id,
        )
        # --- Save OperationPendingResponse to chat history ---
        pending_message = ChatMessage(
            role="tool",
            content=OperationPendingResponse(
                message=pending_msg,
                operation_id=operation_id,
                tool_name=tool_name,
            ).model_dump_json(),
            tool_call_id=tool_call_id,
        )
        session.messages.append(pending_message)
        await upsert_chat_session(session)
        # --- Spawn background task (reuses non-SDK infrastructure) ---
        bg_task = asyncio.create_task(
            _execute_long_running_tool_with_streaming(
                tool_name=tool_name,
                parameters=args,
                tool_call_id=tool_call_id,
                operation_id=operation_id,
                task_id=task_id,
                session_id=session_id,
                user_id=user_id,
            )
        )
        _background_tasks.add(bg_task)
        bg_task.add_done_callback(_background_tasks.discard)
        await stream_registry.set_task_asyncio_task(task_id, bg_task)
        logger.info(
            f"[SDK] Long-running tool {tool_name} delegated to background "
            f"(operation_id={operation_id}, task_id={task_id})"
        )
        # --- Return OperationStartedResponse as MCP tool result ---
        # This flows through SDK → response adapter → frontend, triggering
        # the loading widget with SSE reconnection support.
        started_json = OperationStartedResponse(
            message=started_msg,
            operation_id=operation_id,
            tool_name=tool_name,
            task_id=task_id,
        ).model_dump_json()
        return {
            "content": [{"type": "text", "text": started_json}],
            "isError": False,
        }
    return _callback
 def _resolve_sdk_model() -> str | None:
    """Resolve the model name for the Claude Agent SDK CLI.
    Uses ``config.claude_agent_model`` if set, otherwise derives from
    ``config.model`` by stripping the OpenRouter provider prefix (e.g.,
    ``"anthropic/claude-opus-4.6"`` → ``"claude-opus-4.6"``).
    """
    if config.claude_agent_model:
        return config.claude_agent_model
    model = config.model
    if "/" in model:
        return model.split("/", 1)[1]
    return model
 def _build_sdk_env() -> dict[str, str]:
    """Build env vars for the SDK CLI process.
    Routes API calls through OpenRouter (or a custom base_url) using
    the same ``config.api_key`` / ``config.base_url`` as the non-SDK path.
    This gives per-call token and cost tracking on the OpenRouter dashboard.
    Only overrides ``ANTHROPIC_API_KEY`` when a valid proxy URL and auth
    token are both present — otherwise returns an empty dict so the SDK
    falls back to its default credentials.
    """
    env: dict[str, str] = {}
    if config.api_key and config.base_url:
        # Strip /v1 suffix — SDK expects the base URL without a version path
        base = config.base_url.rstrip("/")
        if base.endswith("/v1"):
            base = base[:-3]
        if not base or not base.startswith("http"):
            # Invalid base_url — don't override SDK defaults
            return env
        env["ANTHROPIC_BASE_URL"] = base
        env["ANTHROPIC_AUTH_TOKEN"] = config.api_key
        # Must be explicitly empty so the CLI uses AUTH_TOKEN instead
        env["ANTHROPIC_API_KEY"] = ""
    return env
 def _make_sdk_cwd(session_id: str) -> str:
    """Create a safe, session-specific working directory path.
    Delegates to :func:`~backend.api.features.chat.tools.sandbox.make_session_path`
    (single source of truth for path sanitization) and adds a defence-in-depth
    assertion.
    """
    cwd = make_session_path(session_id)
    # Defence-in-depth: normpath + startswith is a CodeQL-recognised sanitizer
    cwd = os.path.normpath(cwd)
    if not cwd.startswith(_SDK_CWD_PREFIX):
        raise ValueError(f"SDK cwd escaped prefix: {cwd}")
    return cwd
 def _cleanup_sdk_tool_results(cwd: str) -> None:
    """Remove SDK tool-result files for a specific session working directory.
    The SDK creates tool-result files under ~/.claude/projects/<encoded-cwd>/tool-results/.
    We clean only the specific cwd's results to avoid race conditions between
    concurrent sessions.
    Security: cwd MUST be created by _make_sdk_cwd() which sanitizes session_id.
    """
    import shutil
    # Validate cwd is under the expected prefix
    normalized = os.path.normpath(cwd)
    if not normalized.startswith(_SDK_CWD_PREFIX):
        logger.warning(f"[SDK] Rejecting cleanup for path outside workspace: {cwd}")
        return
    # SDK encodes the cwd path by replacing '/' with '-'
    encoded_cwd = normalized.replace("/", "-")
    # Construct the project directory path (known-safe home expansion)
    claude_projects = os.path.expanduser("~/.claude/projects")
    project_dir = os.path.join(claude_projects, encoded_cwd)
    # Security check 3: Validate project_dir is under ~/.claude/projects
    project_dir = os.path.normpath(project_dir)
    if not project_dir.startswith(claude_projects):
        logger.warning(
            f"[SDK] Rejecting cleanup for escaped project path: {project_dir}"
        )
        return
    results_dir = os.path.join(project_dir, "tool-results")
    if os.path.isdir(results_dir):
        for filename in os.listdir(results_dir):
            file_path = os.path.join(results_dir, filename)
            try:
                if os.path.isfile(file_path):
                    os.remove(file_path)
            except OSError:
                pass
    # Also clean up the temp cwd directory itself
    try:
        shutil.rmtree(normalized, ignore_errors=True)
    except OSError:
        pass
 async def _compress_conversation_history(
    session: ChatSession,
 ) -> list[ChatMessage]:
    """Compress prior conversation messages if they exceed the token threshold.
    Uses the shared compress_context() from prompt.py which supports:
    - LLM summarization of old messages (keeps recent ones intact)
    - Progressive content truncation as fallback
    - Middle-out deletion as last resort
    Returns the compressed prior messages (everything except the current message).
    """
    prior = session.messages[:-1]
    if len(prior) < 2:
        return prior
    from backend.util.prompt import compress_context
    # Convert ChatMessages to dicts for compress_context
    messages_dict = []
    for msg in prior:
        msg_dict: dict[str, Any] = {"role": msg.role}
        if msg.content:
            msg_dict["content"] = msg.content
        if msg.tool_calls:
            msg_dict["tool_calls"] = msg.tool_calls
        if msg.tool_call_id:
            msg_dict["tool_call_id"] = msg.tool_call_id
        messages_dict.append(msg_dict)
    try:
        import openai
        async with openai.AsyncOpenAI(
            api_key=config.api_key, base_url=config.base_url, timeout=30.0
        ) as client:
            result = await compress_context(
                messages=messages_dict,
                model=config.model,
                client=client,
            )
    except Exception as e:
        logger.warning(f"[SDK] Context compression with LLM failed: {e}")
        # Fall back to truncation-only (no LLM summarization)
        result = await compress_context(
            messages=messages_dict,
            model=config.model,
            client=None,
        )
    if result.was_compacted:
        logger.info(
            f"[SDK] Context compacted: {result.original_token_count} -> "
            f"{result.token_count} tokens "
            f"({result.messages_summarized} summarized, "
            f"{result.messages_dropped} dropped)"
        )
        # Convert compressed dicts back to ChatMessages
        return [
            ChatMessage(
                role=m["role"],
                content=m.get("content"),
                tool_calls=m.get("tool_calls"),
                tool_call_id=m.get("tool_call_id"),
            )
            for m in result.messages
        ]
    return prior
 def _format_conversation_context(messages: list[ChatMessage]) -> str | None:
    """Format conversation messages into a context prefix for the user message.
    Returns a string like:
        <conversation_history>
        User: hello
        You responded: Hi! How can I help?
        </conversation_history>
    Returns None if there are no messages to format.
    """
    if not messages:
        return None
    lines: list[str] = []
    for msg in messages:
        if not msg.content:
            continue
        if msg.role == "user":
            lines.append(f"User: {msg.content}")
        elif msg.role == "assistant":
            lines.append(f"You responded: {msg.content}")
        # Skip tool messages — they're internal details
    if not lines:
        return None
    return "<conversation_history>\n" + "\n".join(lines) + "\n</conversation_history>"
 async def stream_chat_completion_sdk(
    session_id: str,
    message: str | None = None,
    tool_call_response: str | None = None,  # noqa: ARG001
    is_user_message: bool = True,
    user_id: str | None = None,
    retry_count: int = 0,  # noqa: ARG001
    session: ChatSession | None = None,
    context: dict[str, str] | None = None,  # noqa: ARG001
 ) -> AsyncGenerator[StreamBaseResponse, None]:
    """Stream chat completion using Claude Agent SDK.
    Drop-in replacement for stream_chat_completion with improved reliability.
    """
    if session is None:
        session = await get_chat_session(session_id, user_id)
    if not session:
        raise NotFoundError(
            f"Session {session_id} not found. Please create a new session first."
        )
    if message:
        session.messages.append(
            ChatMessage(
                role="user" if is_user_message else "assistant", content=message
            )
        )
        if is_user_message:
            track_user_message(
                user_id=user_id, session_id=session_id, message_length=len(message)
            )
    session = await upsert_chat_session(session)
    # Generate title for new sessions (first user message)
    if is_user_message and not session.title:
        user_messages = [m for m in session.messages if m.role == "user"]
        if len(user_messages) == 1:
            first_message = user_messages[0].content or message or ""
            if first_message:
                task = asyncio.create_task(
                    _update_title_async(session_id, first_message, user_id)
                )
                _background_tasks.add(task)
                task.add_done_callback(_background_tasks.discard)
    # Build system prompt (reuses non-SDK path with Langfuse support)
    has_history = len(session.messages) > 1
    system_prompt, _ = await _build_system_prompt(
        user_id, has_conversation_history=has_history
    )
    system_prompt += _SDK_TOOL_SUPPLEMENT
    message_id = str(uuid.uuid4())
    task_id = str(uuid.uuid4())
    yield StreamStart(messageId=message_id, taskId=task_id)
    stream_completed = False
    # Initialise sdk_cwd before the try so the finally can reference it
    # even if _make_sdk_cwd raises (in that case it stays as "").
    sdk_cwd = ""
    use_resume = False
    try:
        # Use a session-specific temp dir to avoid cleanup race conditions
        # between concurrent sessions.
        sdk_cwd = _make_sdk_cwd(session_id)
        os.makedirs(sdk_cwd, exist_ok=True)
        set_execution_context(
            user_id,
            session,
            long_running_callback=_build_long_running_callback(user_id),
        )
        try:
            from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
            # Fail fast when no API credentials are available at all
            sdk_env = _build_sdk_env()
            if not sdk_env and not os.environ.get("ANTHROPIC_API_KEY"):
                raise RuntimeError(
                    "No API key configured. Set OPEN_ROUTER_API_KEY "
                    "(or CHAT_API_KEY) for OpenRouter routing, "
                    "or ANTHROPIC_API_KEY for direct Anthropic access."
                )
            mcp_server = create_copilot_mcp_server()
            sdk_model = _resolve_sdk_model()
            # --- Transcript capture via Stop hook ---
            captured_transcript = CapturedTranscript()
            def _on_stop(transcript_path: str, sdk_session_id: str) -> None:
                captured_transcript.path = transcript_path
                captured_transcript.sdk_session_id = sdk_session_id
            security_hooks = create_security_hooks(
                user_id,
                sdk_cwd=sdk_cwd,
                max_subtasks=config.claude_agent_max_subtasks,
                on_stop=_on_stop if config.claude_agent_use_resume else None,
            )
            # --- Resume strategy: download transcript from bucket ---
            resume_file: str | None = None
            use_resume = False
            if config.claude_agent_use_resume and user_id and len(session.messages) > 1:
                transcript_content = await download_transcript(user_id, session_id)
                if transcript_content and validate_transcript(transcript_content):
                    resume_file = write_transcript_to_tempfile(
                        transcript_content, session_id, sdk_cwd
                    )
                    if resume_file:
                        use_resume = True
                        logger.info(
                            f"[SDK] Using --resume with transcript "
                            f"({len(transcript_content)} bytes)"
                        )
            sdk_options_kwargs: dict[str, Any] = {
                "system_prompt": system_prompt,
                "mcp_servers": {"copilot": mcp_server},
                "allowed_tools": COPILOT_TOOL_NAMES,
                "disallowed_tools": ["Bash"],
                "hooks": security_hooks,
                "cwd": sdk_cwd,
                "max_buffer_size": config.claude_agent_max_buffer_size,
            }
            if sdk_env:
                sdk_options_kwargs["model"] = sdk_model
                sdk_options_kwargs["env"] = sdk_env
            if use_resume and resume_file:
                sdk_options_kwargs["resume"] = resume_file
            options = ClaudeAgentOptions(**sdk_options_kwargs)  # type: ignore[arg-type]
            adapter = SDKResponseAdapter(message_id=message_id)
            adapter.set_task_id(task_id)
            async with ClaudeSDKClient(options=options) as client:
                current_message = message or ""
                if not current_message and session.messages:
                    last_user = [m for m in session.messages if m.role == "user"]
                    if last_user:
                        current_message = last_user[-1].content or ""
                if not current_message.strip():
                    yield StreamError(
                        errorText="Message cannot be empty.",
                        code="empty_prompt",
                    )
                    yield StreamFinish()
                    return
                # Build query: with --resume the CLI already has full
                # context, so we only send the new message.  Without
                # resume, compress history into a context prefix.
                query_message = current_message
                if not use_resume and len(session.messages) > 1:
                    logger.warning(
                        f"[SDK] Using compression fallback for session "
                        f"{session_id} ({len(session.messages)} messages) — "
                        f"no transcript available for --resume"
                    )
                    compressed = await _compress_conversation_history(session)
                    history_context = _format_conversation_context(compressed)
                    if history_context:
                        query_message = (
                            f"{history_context}\n\n"
                            f"Now, the user says:\n{current_message}"
                        )
                logger.info(
                    f"[SDK] Sending query ({len(session.messages)} msgs in session)"
                )
                logger.debug(f"[SDK] Query preview: {current_message[:80]!r}")
                await client.query(query_message, session_id=session_id)
                assistant_response = ChatMessage(role="assistant", content="")
                accumulated_tool_calls: list[dict[str, Any]] = []
                has_appended_assistant = False
                has_tool_results = False
                async for sdk_msg in client.receive_messages():
                    logger.debug(
                        f"[SDK] Received: {type(sdk_msg).__name__} "
                        f"{getattr(sdk_msg, 'subtype', '')}"
                    )
                    for response in adapter.convert_message(sdk_msg):
                        if isinstance(response, StreamStart):
                            continue
                        yield response
                        if isinstance(response, StreamTextDelta):
                            delta = response.delta or ""
                            # After tool results, start a new assistant
                            # message for the post-tool text.
                            if has_tool_results and has_appended_assistant:
                                assistant_response = ChatMessage(
                                    role="assistant", content=delta
                                )
                                accumulated_tool_calls = []
                                has_appended_assistant = False
                                has_tool_results = False
                                session.messages.append(assistant_response)
                                has_appended_assistant = True
                            else:
                                assistant_response.content = (
                                    assistant_response.content or ""
                                ) + delta
                                if not has_appended_assistant:
                                    session.messages.append(assistant_response)
                                    has_appended_assistant = True
                        elif isinstance(response, StreamToolInputAvailable):
                            accumulated_tool_calls.append(
                                {
                                    "id": response.toolCallId,
                                    "type": "function",
                                    "function": {
                                        "name": response.toolName,
                                        "arguments": json.dumps(response.input or {}),
                                    },
                                }
                            )
                            assistant_response.tool_calls = accumulated_tool_calls
                            if not has_appended_assistant:
                                session.messages.append(assistant_response)
                                has_appended_assistant = True
                        elif isinstance(response, StreamToolOutputAvailable):
                            session.messages.append(
                                ChatMessage(
                                    role="tool",
                                    content=(
                                        response.output
                                        if isinstance(response.output, str)
                                        else str(response.output)
                                    ),
                                    tool_call_id=response.toolCallId,
                                )
                            )
                            has_tool_results = True
                        elif isinstance(response, StreamFinish):
                            stream_completed = True
                    if stream_completed:
                        break
                if (
                    assistant_response.content or assistant_response.tool_calls
                ) and not has_appended_assistant:
                    session.messages.append(assistant_response)
                # --- Capture transcript while CLI is still alive ---
                # Must happen INSIDE async with: close() sends SIGTERM
                # which kills the CLI before it can flush the JSONL.
                if (
                    config.claude_agent_use_resume
                    and user_id
                    and captured_transcript.available
                ):
                    # Give CLI time to flush JSONL writes before we read
                    await asyncio.sleep(0.5)
                    raw_transcript = read_transcript_file(captured_transcript.path)
                    if raw_transcript:
                        task = asyncio.create_task(
                            _upload_transcript_bg(user_id, session_id, raw_transcript)
                        )
                        _background_tasks.add(task)
                        task.add_done_callback(_background_tasks.discard)
                    else:
                        logger.debug("[SDK] Stop hook fired but transcript not usable")
        except ImportError:
            raise RuntimeError(
                "claude-agent-sdk is not installed. "
                "Disable SDK mode (CHAT_USE_CLAUDE_AGENT_SDK=false) "
                "to use the OpenAI-compatible fallback."
            )
        await upsert_chat_session(session)
        logger.debug(
            f"[SDK] Session {session_id} saved with {len(session.messages)} messages"
        )
        if not stream_completed:
            yield StreamFinish()
    except Exception as e:
        logger.error(f"[SDK] Error: {e}", exc_info=True)
        try:
            await upsert_chat_session(session)
        except Exception as save_err:
            logger.error(f"[SDK] Failed to save session on error: {save_err}")
        yield StreamError(
            errorText="An error occurred. Please try again.",
            code="sdk_error",
        )
        yield StreamFinish()
    finally:
        if sdk_cwd:
            _cleanup_sdk_tool_results(sdk_cwd)
 async def _upload_transcript_bg(
    user_id: str, session_id: str, raw_content: str
 ) -> None:
    """Background task to strip progress entries and upload transcript."""
    try:
        await upload_transcript(user_id, session_id, raw_content)
    except Exception as e:
        logger.error(f"[SDK] Failed to upload transcript for {session_id}: {e}")
 async def _update_title_async(
    session_id: str, message: str, user_id: str | None = None
 ) -> None:
    """Background task to update session title."""
    try:
        title = await _generate_session_title(
            message, user_id=user_id, session_id=session_id
        )
        if title:
            await update_session_title(session_id, title)
            logger.debug(f"[SDK] Generated title for {session_id}: {title}")
    except Exception as e:
        logger.warning(f"[SDK] Failed to update session title: {e}")
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py
@@ -0,0 +1,325 @@
 """Tool adapter for wrapping existing CoPilot tools as Claude Agent SDK MCP tools.
 This module provides the adapter layer that converts existing BaseTool implementations
 into in-process MCP tools that can be used with the Claude Agent SDK.
 Long-running tools (``is_long_running=True``) are delegated to the non-SDK
 background infrastructure (stream_registry, Redis persistence, SSE reconnection)
 via a callback provided by the service layer.  This avoids wasteful SDK polling
 and makes results survive page refreshes.
 """
 import json
 import logging
 import os
 import uuid
 from collections.abc import Awaitable, Callable
 from contextvars import ContextVar
 from typing import Any
 from backend.api.features.chat.model import ChatSession
 from backend.api.features.chat.tools import TOOL_REGISTRY
 from backend.api.features.chat.tools.base import BaseTool
 logger = logging.getLogger(__name__)
 # Allowed base directory for the Read tool (SDK saves oversized tool results here).
 # Restricted to ~/.claude/projects/ and further validated to require "tool-results"
 # in the path — prevents reading settings, credentials, or other sensitive files.
 _SDK_PROJECTS_DIR = os.path.expanduser("~/.claude/projects/")
 # MCP server naming - the SDK prefixes tool names as "mcp__{server_name}__{tool}"
 MCP_SERVER_NAME = "copilot"
 MCP_TOOL_PREFIX = f"mcp__{MCP_SERVER_NAME}__"
 # Context variables to pass user/session info to tool execution
 _current_user_id: ContextVar[str | None] = ContextVar("current_user_id", default=None)
 _current_session: ContextVar[ChatSession | None] = ContextVar(
    "current_session", default=None
 )
 # Stash for MCP tool outputs before the SDK potentially truncates them.
 # Keyed by tool_name → full output string. Consumed (popped) by the
 # response adapter when it builds StreamToolOutputAvailable.
 _pending_tool_outputs: ContextVar[dict[str, str]] = ContextVar(
    "pending_tool_outputs", default=None  # type: ignore[arg-type]
 )
 # Callback type for delegating long-running tools to the non-SDK infrastructure.
 # Args: (tool_name, arguments, session) → MCP-formatted response dict.
 LongRunningCallback = Callable[
    [str, dict[str, Any], ChatSession], Awaitable[dict[str, Any]]
 ]
 # ContextVar so the service layer can inject the callback per-request.
 _long_running_callback: ContextVar[LongRunningCallback | None] = ContextVar(
    "long_running_callback", default=None
 )
 def set_execution_context(
    user_id: str | None,
    session: ChatSession,
    long_running_callback: LongRunningCallback | None = None,
 ) -> None:
    """Set the execution context for tool calls.
    This must be called before streaming begins to ensure tools have access
    to user_id and session information.
    Args:
        user_id: Current user's ID.
        session: Current chat session.
        long_running_callback: Optional callback to delegate long-running tools
            to the non-SDK background infrastructure (stream_registry + Redis).
    """
    _current_user_id.set(user_id)
    _current_session.set(session)
    _pending_tool_outputs.set({})
    _long_running_callback.set(long_running_callback)
 def get_execution_context() -> tuple[str | None, ChatSession | None]:
    """Get the current execution context."""
    return (
        _current_user_id.get(),
        _current_session.get(),
    )
 def pop_pending_tool_output(tool_name: str) -> str | None:
    """Pop and return the stashed full output for *tool_name*.
    The SDK CLI may truncate large tool results (writing them to disk and
    replacing the content with a file reference). This stash keeps the
    original MCP output so the response adapter can forward it to the
    frontend for proper widget rendering.
    Returns ``None`` if nothing was stashed for *tool_name*.
    """
    pending = _pending_tool_outputs.get(None)
    if pending is None:
        return None
    return pending.pop(tool_name, None)
 async def _execute_tool_sync(
    base_tool: BaseTool,
    user_id: str | None,
    session: ChatSession,
    args: dict[str, Any],
 ) -> dict[str, Any]:
    """Execute a tool synchronously and return MCP-formatted response."""
    effective_id = f"sdk-{uuid.uuid4().hex[:12]}"
    result = await base_tool.execute(
        user_id=user_id,
        session=session,
        tool_call_id=effective_id,
        **args,
    )
    text = (
        result.output if isinstance(result.output, str) else json.dumps(result.output)
    )
    # Stash the full output before the SDK potentially truncates it.
    pending = _pending_tool_outputs.get(None)
    if pending is not None:
        pending[base_tool.name] = text
    return {
        "content": [{"type": "text", "text": text}],
        "isError": not result.success,
    }
 def _mcp_error(message: str) -> dict[str, Any]:
    return {
        "content": [
            {"type": "text", "text": json.dumps({"error": message, "type": "error"})}
        ],
        "isError": True,
    }
 def create_tool_handler(base_tool: BaseTool):
    """Create an async handler function for a BaseTool.
    This wraps the existing BaseTool._execute method to be compatible
    with the Claude Agent SDK MCP tool format.
    Long-running tools (``is_long_running=True``) are delegated to the
    non-SDK background infrastructure via a callback set in the execution
    context.  The callback persists the operation in Redis (stream_registry)
    so results survive page refreshes and pod restarts.
    """
    async def tool_handler(args: dict[str, Any]) -> dict[str, Any]:
        """Execute the wrapped tool and return MCP-formatted response."""
        user_id, session = get_execution_context()
        if session is None:
            return _mcp_error("No session context available")
        # --- Long-running: delegate to non-SDK background infrastructure ---
        if base_tool.is_long_running:
            callback = _long_running_callback.get(None)
            if callback:
                try:
                    return await callback(base_tool.name, args, session)
                except Exception as e:
                    logger.error(
                        f"Long-running callback failed for {base_tool.name}: {e}",
                        exc_info=True,
                    )
                    return _mcp_error(f"Failed to start {base_tool.name}: {e}")
            # No callback — fall through to synchronous execution
            logger.warning(
                f"[SDK] No long-running callback for {base_tool.name}, "
                f"executing synchronously (may block)"
            )
        # --- Normal (fast) tool: execute synchronously ---
        try:
            return await _execute_tool_sync(base_tool, user_id, session, args)
        except Exception as e:
            logger.error(f"Error executing tool {base_tool.name}: {e}", exc_info=True)
            return _mcp_error(f"Failed to execute {base_tool.name}: {e}")
    return tool_handler
 def _build_input_schema(base_tool: BaseTool) -> dict[str, Any]:
    """Build a JSON Schema input schema for a tool."""
    return {
        "type": "object",
        "properties": base_tool.parameters.get("properties", {}),
        "required": base_tool.parameters.get("required", []),
    }
 async def _read_file_handler(args: dict[str, Any]) -> dict[str, Any]:
    """Read a file with optional offset/limit. Restricted to SDK working directory.
    After reading, the file is deleted to prevent accumulation in long-running pods.
    """
    file_path = args.get("file_path", "")
    offset = args.get("offset", 0)
    limit = args.get("limit", 2000)
    # Security: only allow reads under ~/.claude/projects/**/tool-results/
    real_path = os.path.realpath(file_path)
    if not real_path.startswith(_SDK_PROJECTS_DIR) or "tool-results" not in real_path:
        return {
            "content": [{"type": "text", "text": f"Access denied: {file_path}"}],
            "isError": True,
        }
    try:
        with open(real_path) as f:
            lines = f.readlines()
        selected = lines[offset : offset + limit]
        content = "".join(selected)
        # Clean up to prevent accumulation in long-running pods
        try:
            os.remove(real_path)
        except OSError:
            pass
        return {"content": [{"type": "text", "text": content}], "isError": False}
    except FileNotFoundError:
        return {
            "content": [{"type": "text", "text": f"File not found: {file_path}"}],
            "isError": True,
        }
    except Exception as e:
        return {
            "content": [{"type": "text", "text": f"Error reading file: {e}"}],
            "isError": True,
        }
 _READ_TOOL_NAME = "Read"
 _READ_TOOL_DESCRIPTION = (
    "Read a file from the local filesystem. "
    "Use offset and limit to read specific line ranges for large files."
 )
 _READ_TOOL_SCHEMA = {
    "type": "object",
    "properties": {
        "file_path": {
            "type": "string",
            "description": "The absolute path to the file to read",
        },
        "offset": {
            "type": "integer",
            "description": "Line number to start reading from (0-indexed). Default: 0",
        },
        "limit": {
            "type": "integer",
            "description": "Number of lines to read. Default: 2000",
        },
    },
    "required": ["file_path"],
 }
 # Create the MCP server configuration
 def create_copilot_mcp_server():
    """Create an in-process MCP server configuration for CoPilot tools.
    This can be passed to ClaudeAgentOptions.mcp_servers.
    Note: The actual SDK MCP server creation depends on the claude-agent-sdk
    package being available. This function returns the configuration that
    can be used with the SDK.
    """
    try:
        from claude_agent_sdk import create_sdk_mcp_server, tool
        # Create decorated tool functions
        sdk_tools = []
        for tool_name, base_tool in TOOL_REGISTRY.items():
            handler = create_tool_handler(base_tool)
            decorated = tool(
                tool_name,
                base_tool.description,
                _build_input_schema(base_tool),
            )(handler)
            sdk_tools.append(decorated)
        # Add the Read tool so the SDK can read back oversized tool results
        read_tool = tool(
            _READ_TOOL_NAME,
            _READ_TOOL_DESCRIPTION,
            _READ_TOOL_SCHEMA,
        )(_read_file_handler)
        sdk_tools.append(read_tool)
        server = create_sdk_mcp_server(
            name=MCP_SERVER_NAME,
            version="1.0.0",
            tools=sdk_tools,
        )
        return server
    except ImportError:
        # Let ImportError propagate so service.py handles the fallback
        raise
 # SDK built-in tools allowed within the workspace directory.
 # Security hooks validate that file paths stay within sdk_cwd.
 # Bash is NOT included — use the sandboxed MCP bash_exec tool instead,
 # which provides kernel-level network isolation via unshare --net.
 # Task allows spawning sub-agents (rate-limited by security hooks).
 _SDK_BUILTIN_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep", "Task"]
 # List of tool names for allowed_tools configuration
 # Include MCP tools, the MCP Read tool for oversized results,
 # and SDK built-in file tools for workspace operations.
 COPILOT_TOOL_NAMES = [
    *[f"{MCP_TOOL_PREFIX}{name}" for name in TOOL_REGISTRY.keys()],
    f"{MCP_TOOL_PREFIX}{_READ_TOOL_NAME}",
    *_SDK_BUILTIN_TOOLS,
 ]
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/transcript.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/transcript.py
@@ -0,0 +1,355 @@
 """JSONL transcript management for stateless multi-turn resume.
 The Claude Code CLI persists conversations as JSONL files (one JSON object per
 line).  When the SDK's ``Stop`` hook fires we read this file, strip bloat
 (progress entries, metadata), and upload the result to bucket storage.  On the
 next turn we download the transcript, write it to a temp file, and pass
 ``--resume`` so the CLI can reconstruct the full conversation.
 Storage is handled via ``WorkspaceStorageBackend`` (GCS in prod, local
 filesystem for self-hosted) — no DB column needed.
 """
 import json
 import logging
 import os
 import re
 logger = logging.getLogger(__name__)
 # UUIDs are hex + hyphens; strip everything else to prevent path injection.
 _SAFE_ID_RE = re.compile(r"[^0-9a-fA-F-]")
 # Entry types that can be safely removed from the transcript without breaking
 # the parentUuid conversation tree that ``--resume`` relies on.
 # - progress: UI progress ticks, no message content (avg 97KB for agent_progress)
 # - file-history-snapshot: undo tracking metadata
 # - queue-operation: internal queue bookkeeping
 # - summary: session summaries
 # - pr-link: PR link metadata
 STRIPPABLE_TYPES = frozenset(
    {"progress", "file-history-snapshot", "queue-operation", "summary", "pr-link"}
 )
 # Workspace storage constants — deterministic path from session_id.
 TRANSCRIPT_STORAGE_PREFIX = "chat-transcripts"
 # ---------------------------------------------------------------------------
 # Progress stripping
 # ---------------------------------------------------------------------------
 def strip_progress_entries(content: str) -> str:
    """Remove progress/metadata entries from a JSONL transcript.
    Removes entries whose ``type`` is in ``STRIPPABLE_TYPES`` and reparents
    any remaining child entries so the ``parentUuid`` chain stays intact.
    Typically reduces transcript size by ~30%.
    """
    lines = content.strip().split("\n")
    entries: list[dict] = []
    for line in lines:
        try:
            entries.append(json.loads(line))
        except json.JSONDecodeError:
            # Keep unparseable lines as-is (safety)
            entries.append({"_raw": line})
    stripped_uuids: set[str] = set()
    uuid_to_parent: dict[str, str] = {}
    kept: list[dict] = []
    for entry in entries:
        if "_raw" in entry:
            kept.append(entry)
            continue
        uid = entry.get("uuid", "")
        parent = entry.get("parentUuid", "")
        entry_type = entry.get("type", "")
        if uid:
            uuid_to_parent[uid] = parent
        if entry_type in STRIPPABLE_TYPES:
            if uid:
                stripped_uuids.add(uid)
        else:
            kept.append(entry)
    # Reparent: walk up chain through stripped entries to find surviving ancestor
    for entry in kept:
        if "_raw" in entry:
            continue
        parent = entry.get("parentUuid", "")
        original_parent = parent
        while parent in stripped_uuids:
            parent = uuid_to_parent.get(parent, "")
        if parent != original_parent:
            entry["parentUuid"] = parent
    result_lines: list[str] = []
    for entry in kept:
        if "_raw" in entry:
            result_lines.append(entry["_raw"])
        else:
            result_lines.append(json.dumps(entry, separators=(",", ":")))
    return "\n".join(result_lines) + "\n"
 # ---------------------------------------------------------------------------
 # Local file I/O (read from CLI's JSONL, write temp file for --resume)
 # ---------------------------------------------------------------------------
 def read_transcript_file(transcript_path: str) -> str | None:
    """Read a JSONL transcript file from disk.
    Returns the raw JSONL content, or ``None`` if the file is missing, empty,
    or only contains metadata (≤2 lines with no conversation messages).
    """
    if not transcript_path or not os.path.isfile(transcript_path):
        logger.debug(f"[Transcript] File not found: {transcript_path}")
        return None
    try:
        with open(transcript_path) as f:
            content = f.read()
        if not content.strip():
            logger.debug(f"[Transcript] Empty file: {transcript_path}")
            return None
        lines = content.strip().split("\n")
        if len(lines) < 2:
            # Metadata-only files have 1 line (single queue-operation or snapshot).
            logger.debug(
                f"[Transcript] Too few lines ({len(lines)}): {transcript_path}"
            )
            return None
        # Quick structural validation — parse first and last lines.
        json.loads(lines[0])
        json.loads(lines[-1])
        logger.info(
            f"[Transcript] Read {len(lines)} lines, "
            f"{len(content)} bytes from {transcript_path}"
        )
        return content
    except (json.JSONDecodeError, OSError) as e:
        logger.warning(f"[Transcript] Failed to read {transcript_path}: {e}")
        return None
 def _sanitize_id(raw_id: str, max_len: int = 36) -> str:
    """Sanitize an ID for safe use in file paths.
    Session/user IDs are expected to be UUIDs (hex + hyphens).  Strip
    everything else and truncate to *max_len* so the result cannot introduce
    path separators or other special characters.
    """
    cleaned = _SAFE_ID_RE.sub("", raw_id or "")[:max_len]
    return cleaned or "unknown"
 _SAFE_CWD_PREFIX = os.path.realpath("/tmp/copilot-")
 def write_transcript_to_tempfile(
    transcript_content: str,
    session_id: str,
    cwd: str,
 ) -> str | None:
    """Write JSONL transcript to a temp file inside *cwd* for ``--resume``.
    The file lives in the session working directory so it is cleaned up
    automatically when the session ends.
    Returns the absolute path to the file, or ``None`` on failure.
    """
    # Validate cwd is under the expected sandbox prefix (CodeQL sanitizer).
    real_cwd = os.path.realpath(cwd)
    if not real_cwd.startswith(_SAFE_CWD_PREFIX):
        logger.warning(f"[Transcript] cwd outside sandbox: {cwd}")
        return None
    try:
        os.makedirs(real_cwd, exist_ok=True)
        safe_id = _sanitize_id(session_id, max_len=8)
        jsonl_path = os.path.realpath(
            os.path.join(real_cwd, f"transcript-{safe_id}.jsonl")
        )
        if not jsonl_path.startswith(real_cwd):
            logger.warning(f"[Transcript] Path escaped cwd: {jsonl_path}")
            return None
        with open(jsonl_path, "w") as f:
            f.write(transcript_content)
        logger.info(f"[Transcript] Wrote resume file: {jsonl_path}")
        return jsonl_path
    except OSError as e:
        logger.warning(f"[Transcript] Failed to write resume file: {e}")
        return None
 def validate_transcript(content: str | None) -> bool:
    """Check that a transcript has actual conversation messages.
    A valid transcript for resume needs at least one user message and one
    assistant message (not just queue-operation / file-history-snapshot
    metadata).
    """
    if not content or not content.strip():
        return False
    lines = content.strip().split("\n")
    if len(lines) < 2:
        return False
    has_user = False
    has_assistant = False
    for line in lines:
        try:
            entry = json.loads(line)
            msg_type = entry.get("type")
            if msg_type == "user":
                has_user = True
            elif msg_type == "assistant":
                has_assistant = True
        except json.JSONDecodeError:
            return False
    return has_user and has_assistant
 # ---------------------------------------------------------------------------
 # Bucket storage (GCS / local via WorkspaceStorageBackend)
 # ---------------------------------------------------------------------------
 def _storage_path_parts(user_id: str, session_id: str) -> tuple[str, str, str]:
    """Return (workspace_id, file_id, filename) for a session's transcript.
    Path structure: ``chat-transcripts/{user_id}/{session_id}.jsonl``
    IDs are sanitized to hex+hyphen to prevent path traversal.
    """
    return (
        TRANSCRIPT_STORAGE_PREFIX,
        _sanitize_id(user_id),
        f"{_sanitize_id(session_id)}.jsonl",
    )
 def _build_storage_path(user_id: str, session_id: str, backend: object) -> str:
    """Build the full storage path string that ``retrieve()`` expects.
    ``store()`` returns a path like ``gcs://bucket/workspaces/...`` or
    ``local://workspace_id/file_id/filename``.  Since we use deterministic
    arguments we can reconstruct the same path for download/delete without
    having stored the return value.
    """
    from backend.util.workspace_storage import GCSWorkspaceStorage
    wid, fid, fname = _storage_path_parts(user_id, session_id)
    if isinstance(backend, GCSWorkspaceStorage):
        blob = f"workspaces/{wid}/{fid}/{fname}"
        return f"gcs://{backend.bucket_name}/{blob}"
    else:
        # LocalWorkspaceStorage returns local://{relative_path}
        return f"local://{wid}/{fid}/{fname}"
 async def upload_transcript(user_id: str, session_id: str, content: str) -> None:
    """Strip progress entries and upload transcript to bucket storage.
    Safety: only overwrites when the new (stripped) transcript is larger than
    what is already stored.  Since JSONL is append-only, the latest transcript
    is always the longest.  This prevents a slow/stale background task from
    clobbering a newer upload from a concurrent turn.
    """
    from backend.util.workspace_storage import get_workspace_storage
    stripped = strip_progress_entries(content)
    if not validate_transcript(stripped):
        logger.warning(
            f"[Transcript] Skipping upload — stripped content is not a valid "
            f"transcript for session {session_id}"
        )
        return
    storage = await get_workspace_storage()
    wid, fid, fname = _storage_path_parts(user_id, session_id)
    encoded = stripped.encode("utf-8")
    new_size = len(encoded)
    # Check existing transcript size to avoid overwriting newer with older
    path = _build_storage_path(user_id, session_id, storage)
    try:
        existing = await storage.retrieve(path)
        if len(existing) >= new_size:
            logger.info(
                f"[Transcript] Skipping upload — existing transcript "
                f"({len(existing)}B) >= new ({new_size}B) for session "
                f"{session_id}"
            )
            return
    except (FileNotFoundError, Exception):
        pass  # No existing transcript or retrieval error — proceed with upload
    await storage.store(
        workspace_id=wid,
        file_id=fid,
        filename=fname,
        content=encoded,
    )
    logger.info(
        f"[Transcript] Uploaded {new_size} bytes "
        f"(stripped from {len(content)}) for session {session_id}"
    )
 async def download_transcript(user_id: str, session_id: str) -> str | None:
    """Download transcript from bucket storage.
    Returns the JSONL content string, or ``None`` if not found.
    """
    from backend.util.workspace_storage import get_workspace_storage
    storage = await get_workspace_storage()
    path = _build_storage_path(user_id, session_id, storage)
    try:
        data = await storage.retrieve(path)
        content = data.decode("utf-8")
        logger.info(
            f"[Transcript] Downloaded {len(content)} bytes for session {session_id}"
        )
        return content
    except FileNotFoundError:
        logger.debug(f"[Transcript] No transcript in storage for {session_id}")
        return None
    except Exception as e:
        logger.warning(f"[Transcript] Failed to download transcript: {e}")
        return None
 async def delete_transcript(user_id: str, session_id: str) -> None:
    """Delete transcript from bucket storage (e.g. after resume failure)."""
    from backend.util.workspace_storage import get_workspace_storage
    storage = await get_workspace_storage()
    path = _build_storage_path(user_id, session_id, storage)
    try:
        await storage.delete(path)
        logger.info(f"[Transcript] Deleted transcript for session {session_id}")
    except Exception as e:
        logger.warning(f"[Transcript] Failed to delete transcript: {e}")
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -245,12 +245,16 @@ async def _get_system_prompt_template(context: str) -> str:
    return DEFAULT_SYSTEM_PROMPT.format(users_information=context)
-async def _build_system_prompt(user_id: str | None) -> tuple[str, Any]:
+async def _build_system_prompt(
    user_id: str | None, has_conversation_history: bool = False
 ) -> tuple[str, Any]:
    """Build the full system prompt including business understanding if available.
    Args:
-        user_id: The user ID for fetching business understanding
+        user_id: The user ID for fetching business understanding.
-                     If "default" and this is the user's first session, will use "onboarding" instead.
+        has_conversation_history: Whether there's existing conversation history.
            If True, we don't tell the model to greet/introduce (since they're
            already in a conversation).
    Returns:
        Tuple of (compiled prompt string, business understanding object)
@@ -266,6 +270,8 @@ async def _build_system_prompt(user_id: str | None) -> tuple[str, Any]:
    if understanding:
        context = format_understanding_for_prompt(understanding)
    elif has_conversation_history:
        context = "No prior understanding saved yet. Continue the existing conversation naturally."
    else:
        context = "This is the first time you are meeting the user. Greet them and introduce them to the platform"
@@ -374,7 +380,6 @@ async def stream_chat_completion(
    Raises:
        NotFoundError: If session_id is invalid
        ValueError: If max_context_messages is exceeded
    """
    completion_start = time.monotonic()
@@ -459,8 +464,9 @@ async def stream_chat_completion(
    # Generate title for new sessions on first user message (non-blocking)
    # Check: is_user_message, no title yet, and this is the first user message
-    if is_user_message and message and not session.title:
+    user_messages = [m for m in session.messages if m.role == "user"]
-        user_messages = [m for m in session.messages if m.role == "user"]
+    first_user_msg = message or (user_messages[0].content if user_messages else None)
    if is_user_message and first_user_msg and not session.title:
        if len(user_messages) == 1:
            # First user message - generate title in background
            import asyncio
@@ -468,7 +474,7 @@ async def stream_chat_completion(
            # Capture only the values we need (not the session object) to avoid
            # stale data issues when the main flow modifies the session
            captured_session_id = session_id
-            captured_message = message
+            captured_message = first_user_msg
            captured_user_id = user_id
            async def _update_title():
@@ -1237,7 +1243,7 @@ async def _stream_chat_chunks(
                total_time = (time_module.perf_counter() - stream_chunks_start) * 1000
                logger.info(
-                    f"[TIMING] _stream_chat_chunks COMPLETED in {total_time/1000:.1f}s; "
+                    f"[TIMING] _stream_chat_chunks COMPLETED in {total_time / 1000:.1f}s; "
                    f"session={session.session_id}, user={session.user_id}",
                    extra={"json_fields": {**log_meta, "total_time_ms": total_time}},
                )
--- a/autogpt_platform/backend/backend/api/features/chat/service_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service_test.py
@@ -1,3 +1,4 @@
 import asyncio
 import logging
 from os import getenv
@@ -11,6 +12,8 @@ from .response_model import (
    StreamTextDelta,
    StreamToolOutputAvailable,
 )
 from .sdk import service as sdk_service
 from .sdk.transcript import download_transcript
 logger = logging.getLogger(__name__)
@@ -80,3 +83,96 @@ async def test_stream_chat_completion_with_tool_calls(setup_test_user, test_user
    session = await get_chat_session(session.session_id)
    assert session, "Session not found"
    assert session.usage, "Usage is empty"
@pytest.mark.asyncio(loop_scope="session")
 async def test_sdk_resume_multi_turn(setup_test_user, test_user_id):
    """Test that the SDK --resume path captures and uses transcripts across turns.
    Turn 1: Send a message containing a unique keyword.
    Turn 2: Ask the model to recall that keyword — proving the transcript was
    persisted and restored via --resume.
    """
    api_key: str | None = getenv("OPEN_ROUTER_API_KEY")
    if not api_key:
        return pytest.skip("OPEN_ROUTER_API_KEY is not set, skipping test")
    from .config import ChatConfig
    cfg = ChatConfig()
    if not cfg.claude_agent_use_resume:
        return pytest.skip("CLAUDE_AGENT_USE_RESUME is not enabled, skipping test")
    session = await create_chat_session(test_user_id)
    session = await upsert_chat_session(session)
    # --- Turn 1: send a message with a unique keyword ---
    keyword = "ZEPHYR42"
    turn1_msg = (
        f"Please remember this special keyword: {keyword}. "
        "Just confirm you've noted it, keep your response brief."
    )
    turn1_text = ""
    turn1_errors: list[str] = []
    turn1_ended = False
    async for chunk in sdk_service.stream_chat_completion_sdk(
        session.session_id,
        turn1_msg,
        user_id=test_user_id,
    ):
        if isinstance(chunk, StreamTextDelta):
            turn1_text += chunk.delta
        elif isinstance(chunk, StreamError):
            turn1_errors.append(chunk.errorText)
        elif isinstance(chunk, StreamFinish):
            turn1_ended = True
    assert turn1_ended, "Turn 1 did not finish"
    assert not turn1_errors, f"Turn 1 errors: {turn1_errors}"
    assert turn1_text, "Turn 1 produced no text"
    # Wait for background upload task to complete (retry up to 5s)
    transcript = None
    for _ in range(10):
        await asyncio.sleep(0.5)
        transcript = await download_transcript(test_user_id, session.session_id)
        if transcript:
            break
    assert transcript, (
        "Transcript was not uploaded to bucket after turn 1 — "
        "Stop hook may not have fired or transcript was too small"
    )
    logger.info(f"Turn 1 transcript uploaded: {len(transcript)} bytes")
    # Reload session for turn 2
    session = await get_chat_session(session.session_id, test_user_id)
    assert session, "Session not found after turn 1"
    # --- Turn 2: ask model to recall the keyword ---
    turn2_msg = "What was the special keyword I asked you to remember?"
    turn2_text = ""
    turn2_errors: list[str] = []
    turn2_ended = False
    async for chunk in sdk_service.stream_chat_completion_sdk(
        session.session_id,
        turn2_msg,
        user_id=test_user_id,
        session=session,
    ):
        if isinstance(chunk, StreamTextDelta):
            turn2_text += chunk.delta
        elif isinstance(chunk, StreamError):
            turn2_errors.append(chunk.errorText)
        elif isinstance(chunk, StreamFinish):
            turn2_ended = True
    assert turn2_ended, "Turn 2 did not finish"
    assert not turn2_errors, f"Turn 2 errors: {turn2_errors}"
    assert turn2_text, "Turn 2 produced no text"
    assert keyword in turn2_text, (
        f"Model did not recall keyword '{keyword}' in turn 2. "
        f"Response: {turn2_text[:200]}"
    )
    logger.info(f"Turn 2 recalled keyword successfully: {turn2_text[:100]}")
--- a/autogpt_platform/backend/backend/api/features/chat/stream_registry.py
+++ b/autogpt_platform/backend/backend/api/features/chat/stream_registry.py
@@ -814,6 +814,28 @@ async def get_active_task_for_session(
                if task_user_id and user_id != task_user_id:
                    continue
                # Auto-expire stale tasks that exceeded stream_timeout
                created_at_str = meta.get("created_at", "")
                if created_at_str:
                    try:
                        created_at = datetime.fromisoformat(created_at_str)
                        age_seconds = (
                            datetime.now(timezone.utc) - created_at
                        ).total_seconds()
                        if age_seconds > config.stream_timeout:
                            logger.warning(
                                f"[TASK_LOOKUP] Auto-expiring stale task {task_id[:8]}... "
                                f"(age={age_seconds:.0f}s > timeout={config.stream_timeout}s)"
                            )
                            await mark_task_completed(task_id, "failed")
                            continue
                    except (ValueError, TypeError):
                        pass
                logger.info(
                    f"[TASK_LOOKUP] Found running task {task_id[:8]}... for session {session_id[:8]}..."
                )
                # Get the last message ID from Redis Stream
                stream_key = _get_task_stream_key(task_id)
                last_id = "0-0"
--- a/autogpt_platform/backend/backend/api/features/chat/tools/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/init.py
@@ -9,6 +9,8 @@ from backend.api.features.chat.tracking import track_tool_called
 from .add_understanding import AddUnderstandingTool
 from .agent_output import AgentOutputTool
 from .base import BaseTool
 from .bash_exec import BashExecTool
 from .check_operation_status import CheckOperationStatusTool
 from .create_agent import CreateAgentTool
 from .customize_agent import CustomizeAgentTool
 from .edit_agent import EditAgentTool
@@ -19,6 +21,7 @@ from .get_doc_page import GetDocPageTool
 from .run_agent import RunAgentTool
 from .run_block import RunBlockTool
 from .search_docs import SearchDocsTool
 from .web_fetch import WebFetchTool
 from .workspace_files import (
    DeleteWorkspaceFileTool,
    ListWorkspaceFilesTool,
@@ -43,9 +46,14 @@ TOOL_REGISTRY: dict[str, BaseTool] = {
    "run_agent": RunAgentTool(),
    "run_block": RunBlockTool(),
    "view_agent_output": AgentOutputTool(),
    "check_operation_status": CheckOperationStatusTool(),
    "search_docs": SearchDocsTool(),
    "get_doc_page": GetDocPageTool(),
-    # Workspace tools for CoPilot file operations
+    # Web fetch for safe URL retrieval
    "web_fetch": WebFetchTool(),
    # Sandboxed code execution (bubblewrap)
    "bash_exec": BashExecTool(),
    # Persistent workspace tools (cloud storage, survives across sessions)
    "list_workspace_files": ListWorkspaceFilesTool(),
    "read_workspace_file": ReadWorkspaceFileTool(),
    "write_workspace_file": WriteWorkspaceFileTool(),
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/dummy.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/dummy.py
@@ -0,0 +1,154 @@
 """Dummy Agent Generator for testing.
 Returns mock responses matching the format expected from the external service.
 Enable via AGENTGENERATOR_USE_DUMMY=true in settings.
 WARNING: This is for testing only. Do not use in production.
 """
 import asyncio
 import logging
 import uuid
 from typing import Any
 logger = logging.getLogger(__name__)
 # Dummy decomposition result (instructions type)
 DUMMY_DECOMPOSITION_RESULT: dict[str, Any] = {
    "type": "instructions",
    "steps": [
        {
            "description": "Get input from user",
            "action": "input",
            "block_name": "AgentInputBlock",
        },
        {
            "description": "Process the input",
            "action": "process",
            "block_name": "TextFormatterBlock",
        },
        {
            "description": "Return output to user",
            "action": "output",
            "block_name": "AgentOutputBlock",
        },
    ],
 }
 # Block IDs from backend/blocks/io.py
 AGENT_INPUT_BLOCK_ID = "c0a8e994-ebf1-4a9c-a4d8-89d09c86741b"
 AGENT_OUTPUT_BLOCK_ID = "363ae599-353e-4804-937e-b2ee3cef3da4"
 def _generate_dummy_agent_json() -> dict[str, Any]:
    """Generate a minimal valid agent JSON for testing."""
    input_node_id = str(uuid.uuid4())
    output_node_id = str(uuid.uuid4())
    return {
        "id": str(uuid.uuid4()),
        "version": 1,
        "is_active": True,
        "name": "Dummy Test Agent",
        "description": "A dummy agent generated for testing purposes",
        "nodes": [
            {
                "id": input_node_id,
                "block_id": AGENT_INPUT_BLOCK_ID,
                "input_default": {
                    "name": "input",
                    "title": "Input",
                    "description": "Enter your input",
                    "placeholder_values": [],
                },
                "metadata": {"position": {"x": 0, "y": 0}},
            },
            {
                "id": output_node_id,
                "block_id": AGENT_OUTPUT_BLOCK_ID,
                "input_default": {
                    "name": "output",
                    "title": "Output",
                    "description": "Agent output",
                    "format": "{output}",
                },
                "metadata": {"position": {"x": 400, "y": 0}},
            },
        ],
        "links": [
            {
                "id": str(uuid.uuid4()),
                "source_id": input_node_id,
                "sink_id": output_node_id,
                "source_name": "result",
                "sink_name": "value",
                "is_static": False,
            },
        ],
    }
 async def decompose_goal_dummy(
    description: str,
    context: str = "",
    library_agents: list[dict[str, Any]] | None = None,
 ) -> dict[str, Any]:
    """Return dummy decomposition result."""
    logger.info("Using dummy agent generator for decompose_goal")
    return DUMMY_DECOMPOSITION_RESULT.copy()
 async def generate_agent_dummy(
    instructions: dict[str, Any],
    library_agents: list[dict[str, Any]] | None = None,
    operation_id: str | None = None,
    task_id: str | None = None,
 ) -> dict[str, Any]:
    """Return dummy agent JSON after a simulated delay."""
    logger.info("Using dummy agent generator for generate_agent (30s delay)")
    await asyncio.sleep(30)
    return _generate_dummy_agent_json()
 async def generate_agent_patch_dummy(
    update_request: str,
    current_agent: dict[str, Any],
    library_agents: list[dict[str, Any]] | None = None,
    operation_id: str | None = None,
    task_id: str | None = None,
 ) -> dict[str, Any]:
    """Return dummy patched agent (returns the current agent with updated description)."""
    logger.info("Using dummy agent generator for generate_agent_patch")
    patched = current_agent.copy()
    patched["description"] = (
        f"{current_agent.get('description', '')} (updated: {update_request})"
    )
    return patched
 async def customize_template_dummy(
    template_agent: dict[str, Any],
    modification_request: str,
    context: str = "",
 ) -> dict[str, Any]:
    """Return dummy customized template (returns template with updated description)."""
    logger.info("Using dummy agent generator for customize_template")
    customized = template_agent.copy()
    customized["description"] = (
        f"{template_agent.get('description', '')} (customized: {modification_request})"
    )
    return customized
 async def get_blocks_dummy() -> list[dict[str, Any]]:
    """Return dummy blocks list."""
    logger.info("Using dummy agent generator for get_blocks")
    return [
        {"id": AGENT_INPUT_BLOCK_ID, "name": "AgentInputBlock"},
        {"id": AGENT_OUTPUT_BLOCK_ID, "name": "AgentOutputBlock"},
    ]
 async def health_check_dummy() -> bool:
    """Always returns healthy for dummy service."""
    return True
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
@@ -12,8 +12,19 @@ import httpx
 from backend.util.settings import Settings
 from .dummy import (
    customize_template_dummy,
    decompose_goal_dummy,
    generate_agent_dummy,
    generate_agent_patch_dummy,
    get_blocks_dummy,
    health_check_dummy,
 )
 logger = logging.getLogger(__name__)
 _dummy_mode_warned = False
 def _create_error_response(
    error_message: str,
@@ -90,10 +101,26 @@ def _get_settings() -> Settings:
    return _settings
-def is_external_service_configured() -> bool:
+def _is_dummy_mode() -> bool:
-    """Check if external Agent Generator service is configured."""
+    """Check if dummy mode is enabled for testing."""
    global _dummy_mode_warned
    settings = _get_settings()
-    return bool(settings.config.agentgenerator_host)
+    is_dummy = bool(settings.config.agentgenerator_use_dummy)
    if is_dummy and not _dummy_mode_warned:
        logger.warning(
            "Agent Generator running in DUMMY MODE - returning mock responses. "
            "Do not use in production!"
        )
        _dummy_mode_warned = True
    return is_dummy
 def is_external_service_configured() -> bool:
    """Check if external Agent Generator service is configured (or dummy mode)."""
    settings = _get_settings()
    return bool(settings.config.agentgenerator_host) or bool(
        settings.config.agentgenerator_use_dummy
    )
 def _get_base_url() -> str:
@@ -137,6 +164,9 @@ async def decompose_goal_external(
        - {"type": "error", "error": "...", "error_type": "..."} on error
        Or None on unexpected error
    """
    if _is_dummy_mode():
        return await decompose_goal_dummy(description, context, library_agents)
    client = _get_client()
    if context:
@@ -226,6 +256,11 @@ async def generate_agent_external(
    Returns:
        Agent JSON dict, {"status": "accepted"} for async, or error dict {"type": "error", ...} on error
    """
    if _is_dummy_mode():
        return await generate_agent_dummy(
            instructions, library_agents, operation_id, task_id
        )
    client = _get_client()
    # Build request payload
@@ -297,6 +332,11 @@ async def generate_agent_patch_external(
    Returns:
        Updated agent JSON, clarifying questions dict, {"status": "accepted"} for async, or error dict on error
    """
    if _is_dummy_mode():
        return await generate_agent_patch_dummy(
            update_request, current_agent, library_agents, operation_id, task_id
        )
    client = _get_client()
    # Build request payload
@@ -383,6 +423,11 @@ async def customize_template_external(
    Returns:
        Customized agent JSON, clarifying questions dict, or error dict on error
    """
    if _is_dummy_mode():
        return await customize_template_dummy(
            template_agent, modification_request, context
        )
    client = _get_client()
    request = modification_request
@@ -445,6 +490,9 @@ async def get_blocks_external() -> list[dict[str, Any]] | None:
    Returns:
        List of block info dicts or None on error
    """
    if _is_dummy_mode():
        return await get_blocks_dummy()
    client = _get_client()
    try:
@@ -478,6 +526,9 @@ async def health_check() -> bool:
    if not is_external_service_configured():
        return False
    if _is_dummy_mode():
        return await health_check_dummy()
    client = _get_client()
    try:
--- a/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py
@@ -0,0 +1,131 @@
 """Bash execution tool — run shell commands in a bubblewrap sandbox.
 Full Bash scripting is allowed (loops, conditionals, pipes, functions, etc.).
 Safety comes from OS-level isolation (bubblewrap): only system dirs visible
 read-only, writable workspace only, clean env, no network.
 Requires bubblewrap (``bwrap``) — the tool is disabled when bwrap is not
 available (e.g. macOS development).
 """
 import logging
 from typing import Any
 from backend.api.features.chat.model import ChatSession
 from backend.api.features.chat.tools.base import BaseTool
 from backend.api.features.chat.tools.models import (
    BashExecResponse,
    ErrorResponse,
    ToolResponseBase,
 )
 from backend.api.features.chat.tools.sandbox import (
    get_workspace_dir,
    has_full_sandbox,
    run_sandboxed,
 )
 logger = logging.getLogger(__name__)
 class BashExecTool(BaseTool):
    """Execute Bash commands in a bubblewrap sandbox."""
    @property
    def name(self) -> str:
        return "bash_exec"
    @property
    def description(self) -> str:
        if not has_full_sandbox():
            return (
                "Bash execution is DISABLED — bubblewrap sandbox is not "
                "available on this platform. Do not call this tool."
            )
        return (
            "Execute a Bash command or script in a bubblewrap sandbox. "
            "Full Bash scripting is supported (loops, conditionals, pipes, "
            "functions, etc.). "
            "The sandbox shares the same working directory as the SDK Read/Write "
            "tools — files created by either are accessible to both. "
            "SECURITY: Only system directories (/usr, /bin, /lib, /etc) are "
            "visible read-only, the per-session workspace is the only writable "
            "path, environment variables are wiped (no secrets), all network "
            "access is blocked at the kernel level, and resource limits are "
            "enforced (max 64 processes, 512MB memory, 50MB file size). "
            "Application code, configs, and other directories are NOT accessible. "
            "To fetch web content, use the web_fetch tool instead. "
            "Execution is killed after the timeout (default 30s, max 120s). "
            "Returns stdout and stderr. "
            "Useful for file manipulation, data processing with Unix tools "
            "(grep, awk, sed, jq, etc.), and running shell scripts."
        )
    @property
    def parameters(self) -> dict[str, Any]:
        return {
            "type": "object",
            "properties": {
                "command": {
                    "type": "string",
                    "description": "Bash command or script to execute.",
                },
                "timeout": {
                    "type": "integer",
                    "description": (
                        "Max execution time in seconds (default 30, max 120)."
                    ),
                    "default": 30,
                },
            },
            "required": ["command"],
        }
    @property
    def requires_auth(self) -> bool:
        return False
    async def _execute(
        self,
        user_id: str | None,
        session: ChatSession,
        **kwargs: Any,
    ) -> ToolResponseBase:
        session_id = session.session_id if session else None
        if not has_full_sandbox():
            return ErrorResponse(
                message="bash_exec requires bubblewrap sandbox (Linux only).",
                error="sandbox_unavailable",
                session_id=session_id,
            )
        command: str = (kwargs.get("command") or "").strip()
        timeout: int = kwargs.get("timeout", 30)
        if not command:
            return ErrorResponse(
                message="No command provided.",
                error="empty_command",
                session_id=session_id,
            )
        workspace = get_workspace_dir(session_id or "default")
        stdout, stderr, exit_code, timed_out = await run_sandboxed(
            command=["bash", "-c", command],
            cwd=workspace,
            timeout=timeout,
        )
        return BashExecResponse(
            message=(
                "Execution timed out"
                if timed_out
                else f"Command executed (exit {exit_code})"
            ),
            stdout=stdout,
            stderr=stderr,
            exit_code=exit_code,
            timed_out=timed_out,
            session_id=session_id,
        )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/check_operation_status.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/check_operation_status.py
@@ -0,0 +1,127 @@
 """CheckOperationStatusTool — query the status of a long-running operation."""
 import logging
 from typing import Any
 from backend.api.features.chat.model import ChatSession
 from backend.api.features.chat.tools.base import BaseTool
 from backend.api.features.chat.tools.models import (
    ErrorResponse,
    ResponseType,
    ToolResponseBase,
 )
 logger = logging.getLogger(__name__)
 class OperationStatusResponse(ToolResponseBase):
    """Response for check_operation_status tool."""
    type: ResponseType = ResponseType.OPERATION_STATUS
    task_id: str
    operation_id: str
    status: str  # "running", "completed", "failed"
    tool_name: str | None = None
    message: str = ""
 class CheckOperationStatusTool(BaseTool):
    """Check the status of a long-running operation (create_agent, edit_agent, etc.).
    The CoPilot uses this tool to report back to the user whether an
    operation that was started earlier has completed, failed, or is still
    running.
    """
    @property
    def name(self) -> str:
        return "check_operation_status"
    @property
    def description(self) -> str:
        return (
            "Check the current status of a long-running operation such as "
            "create_agent or edit_agent. Accepts either an operation_id or "
            "task_id from a previous operation_started response. "
            "Returns the current status: running, completed, or failed."
        )
    @property
    def parameters(self) -> dict[str, Any]:
        return {
            "type": "object",
            "properties": {
                "operation_id": {
                    "type": "string",
                    "description": (
                        "The operation_id from an operation_started response."
                    ),
                },
                "task_id": {
                    "type": "string",
                    "description": (
                        "The task_id from an operation_started response. "
                        "Used as fallback if operation_id is not provided."
                    ),
                },
            },
            "required": [],
        }
    @property
    def requires_auth(self) -> bool:
        return False
    async def _execute(
        self,
        user_id: str | None,
        session: ChatSession,
        **kwargs,
    ) -> ToolResponseBase:
        from backend.api.features.chat import stream_registry
        operation_id = (kwargs.get("operation_id") or "").strip()
        task_id = (kwargs.get("task_id") or "").strip()
        if not operation_id and not task_id:
            return ErrorResponse(
                message="Please provide an operation_id or task_id.",
                error="missing_parameter",
            )
        task = None
        if operation_id:
            task = await stream_registry.find_task_by_operation_id(operation_id)
        if task is None and task_id:
            task = await stream_registry.get_task(task_id)
        if task is None:
            # Task not in Redis — it may have already expired (TTL).
            # Check conversation history for the result instead.
            return ErrorResponse(
                message=(
                    "Operation not found — it may have already completed and "
                    "expired from the status tracker. Check the conversation "
                    "history for the result."
                ),
                error="not_found",
            )
        status_messages = {
            "running": (
                f"The {task.tool_name or 'operation'} is still running. "
                "Please wait for it to complete."
            ),
            "completed": (
                f"The {task.tool_name or 'operation'} has completed successfully."
            ),
            "failed": f"The {task.tool_name or 'operation'} has failed.",
        }
        return OperationStatusResponse(
            task_id=task.task_id,
            operation_id=task.operation_id,
            status=task.status,
            tool_name=task.tool_name,
            message=status_messages.get(task.status, f"Status: {task.status}"),
        )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
@@ -7,7 +7,6 @@ from backend.api.features.chat.model import ChatSession
 from backend.api.features.chat.tools.base import BaseTool, ToolResponseBase
 from backend.api.features.chat.tools.models import (
    BlockInfoSummary,
    BlockInputFieldInfo,
    BlockListResponse,
    ErrorResponse,
    NoResultsResponse,
@@ -55,7 +54,8 @@ class FindBlockTool(BaseTool):
            "Blocks are reusable components that perform specific tasks like "
            "sending emails, making API calls, processing text, etc. "
            "IMPORTANT: Use this tool FIRST to get the block's 'id' before calling run_block. "
-            "The response includes each block's id, required_inputs, and input_schema."
+            "The response includes each block's id, name, and description. "
            "Call run_block with the block's id **with no inputs** to see detailed inputs/outputs and execute it."
        )
    @property
@@ -124,7 +124,7 @@ class FindBlockTool(BaseTool):
                    session_id=session_id,
                )
-            # Enrich results with full block information
+            # Enrich results with block information
            blocks: list[BlockInfoSummary] = []
            for result in results:
                block_id = result["content_id"]
@@ -141,65 +141,12 @@ class FindBlockTool(BaseTool):
                ):
                    continue
                # Get input/output schemas
                input_schema = {}
                output_schema = {}
                try:
                    input_schema = block.input_schema.jsonschema()
                except Exception as e:
                    logger.debug(
                        "Failed to generate input schema for block %s: %s",
                        block_id,
                        e,
                    )
                try:
                    output_schema = block.output_schema.jsonschema()
                except Exception as e:
                    logger.debug(
                        "Failed to generate output schema for block %s: %s",
                        block_id,
                        e,
                    )
                # Get categories from block instance
                categories = []
                if hasattr(block, "categories") and block.categories:
                    categories = [cat.value for cat in block.categories]
                # Extract required inputs for easier use
                required_inputs: list[BlockInputFieldInfo] = []
                if input_schema:
                    properties = input_schema.get("properties", {})
                    required_fields = set(input_schema.get("required", []))
                    # Get credential field names to exclude from required inputs
                    credentials_fields = set(
                        block.input_schema.get_credentials_fields().keys()
                    )
                    for field_name, field_schema in properties.items():
                        # Skip credential fields - they're handled separately
                        if field_name in credentials_fields:
                            continue
                        required_inputs.append(
                            BlockInputFieldInfo(
                                name=field_name,
                                type=field_schema.get("type", "string"),
                                description=field_schema.get("description", ""),
                                required=field_name in required_fields,
                                default=field_schema.get("default"),
                            )
                        )
                blocks.append(
                    BlockInfoSummary(
                        id=block_id,
                        name=block.name,
                        description=block.description or "",
-                        categories=categories,
+                        categories=[c.value for c in block.categories],
                        input_schema=input_schema,
                        output_schema=output_schema,
                        required_inputs=required_inputs,
                    )
                )
@@ -228,8 +175,7 @@ class FindBlockTool(BaseTool):
            return BlockListResponse(
                message=(
                    f"Found {len(blocks)} block(s) matching '{query}'. "
-                    "To execute a block, use run_block with the block's 'id' field "
+                    "To see a block's inputs/outputs and execute it, use run_block with the block's 'id' - providing no inputs."
                    "and provide 'input_data' matching the block's input_schema."
                ),
                blocks=blocks,
                count=len(blocks),
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
@@ -18,7 +18,13 @@ _TEST_USER_ID = "test-user-find-block"
 def make_mock_block(
-    block_id: str, name: str, block_type: BlockType, disabled: bool = False
+    block_id: str,
    name: str,
    block_type: BlockType,
    disabled: bool = False,
    input_schema: dict | None = None,
    output_schema: dict | None = None,
    credentials_fields: dict | None = None,
 ):
    """Create a mock block for testing."""
    mock = MagicMock()
@@ -28,10 +34,13 @@ def make_mock_block(
    mock.block_type = block_type
    mock.disabled = disabled
    mock.input_schema = MagicMock()
-    mock.input_schema.jsonschema.return_value = {"properties": {}, "required": []}
+    mock.input_schema.jsonschema.return_value = input_schema or {
-    mock.input_schema.get_credentials_fields.return_value = {}
+        "properties": {},
        "required": [],
    }
    mock.input_schema.get_credentials_fields.return_value = credentials_fields or {}
    mock.output_schema = MagicMock()
-    mock.output_schema.jsonschema.return_value = {}
+    mock.output_schema.jsonschema.return_value = output_schema or {}
    mock.categories = []
    return mock
@@ -137,3 +146,241 @@ class TestFindBlockFiltering:
        assert isinstance(response, BlockListResponse)
        assert len(response.blocks) == 1
        assert response.blocks[0].id == "normal-block-id"
    @pytest.mark.asyncio(loop_scope="session")
    async def test_response_size_average_chars_per_block(self):
        """Measure average chars per block in the serialized response."""
        session = make_session(user_id=_TEST_USER_ID)
        # Realistic block definitions modeled after real blocks
        block_defs = [
            {
                "id": "http-block-id",
                "name": "Send Web Request",
                "input_schema": {
                    "properties": {
                        "url": {
                            "type": "string",
                            "description": "The URL to send the request to",
                        },
                        "method": {
                            "type": "string",
                            "description": "The HTTP method to use",
                        },
                        "headers": {
                            "type": "object",
                            "description": "Headers to include in the request",
                        },
                        "json_format": {
                            "type": "boolean",
                            "description": "If true, send the body as JSON",
                        },
                        "body": {
                            "type": "object",
                            "description": "Form/JSON body payload",
                        },
                        "credentials": {
                            "type": "object",
                            "description": "HTTP credentials",
                        },
                    },
                    "required": ["url", "method"],
                },
                "output_schema": {
                    "properties": {
                        "response": {
                            "type": "object",
                            "description": "The response from the server",
                        },
                        "client_error": {
                            "type": "object",
                            "description": "Errors on 4xx status codes",
                        },
                        "server_error": {
                            "type": "object",
                            "description": "Errors on 5xx status codes",
                        },
                        "error": {
                            "type": "string",
                            "description": "Errors for all other exceptions",
                        },
                    },
                },
                "credentials_fields": {"credentials": True},
            },
            {
                "id": "email-block-id",
                "name": "Send Email",
                "input_schema": {
                    "properties": {
                        "to_email": {
                            "type": "string",
                            "description": "Recipient email address",
                        },
                        "subject": {
                            "type": "string",
                            "description": "Subject of the email",
                        },
                        "body": {
                            "type": "string",
                            "description": "Body of the email",
                        },
                        "config": {
                            "type": "object",
                            "description": "SMTP Config",
                        },
                        "credentials": {
                            "type": "object",
                            "description": "SMTP credentials",
                        },
                    },
                    "required": ["to_email", "subject", "body", "credentials"],
                },
                "output_schema": {
                    "properties": {
                        "status": {
                            "type": "string",
                            "description": "Status of the email sending operation",
                        },
                        "error": {
                            "type": "string",
                            "description": "Error message if sending failed",
                        },
                    },
                },
                "credentials_fields": {"credentials": True},
            },
            {
                "id": "claude-code-block-id",
                "name": "Claude Code",
                "input_schema": {
                    "properties": {
                        "e2b_credentials": {
                            "type": "object",
                            "description": "API key for E2B platform",
                        },
                        "anthropic_credentials": {
                            "type": "object",
                            "description": "API key for Anthropic",
                        },
                        "prompt": {
                            "type": "string",
                            "description": "Task or instruction for Claude Code",
                        },
                        "timeout": {
                            "type": "integer",
                            "description": "Sandbox timeout in seconds",
                        },
                        "setup_commands": {
                            "type": "array",
                            "description": "Shell commands to run before execution",
                        },
                        "working_directory": {
                            "type": "string",
                            "description": "Working directory for Claude Code",
                        },
                        "session_id": {
                            "type": "string",
                            "description": "Session ID to resume a conversation",
                        },
                        "sandbox_id": {
                            "type": "string",
                            "description": "Sandbox ID to reconnect to",
                        },
                        "conversation_history": {
                            "type": "string",
                            "description": "Previous conversation history",
                        },
                        "dispose_sandbox": {
                            "type": "boolean",
                            "description": "Whether to dispose sandbox after execution",
                        },
                    },
                    "required": [
                        "e2b_credentials",
                        "anthropic_credentials",
                        "prompt",
                    ],
                },
                "output_schema": {
                    "properties": {
                        "response": {
                            "type": "string",
                            "description": "Output from Claude Code execution",
                        },
                        "files": {
                            "type": "array",
                            "description": "Files created/modified by Claude Code",
                        },
                        "conversation_history": {
                            "type": "string",
                            "description": "Full conversation history",
                        },
                        "session_id": {
                            "type": "string",
                            "description": "Session ID for this conversation",
                        },
                        "sandbox_id": {
                            "type": "string",
                            "description": "ID of the sandbox instance",
                        },
                        "error": {
                            "type": "string",
                            "description": "Error message if execution failed",
                        },
                    },
                },
                "credentials_fields": {
                    "e2b_credentials": True,
                    "anthropic_credentials": True,
                },
            },
        ]
        search_results = [
            {"content_id": d["id"], "score": 0.9 - i * 0.1}
            for i, d in enumerate(block_defs)
        ]
        mock_blocks = {
            d["id"]: make_mock_block(
                block_id=d["id"],
                name=d["name"],
                block_type=BlockType.STANDARD,
                input_schema=d["input_schema"],
                output_schema=d["output_schema"],
                credentials_fields=d["credentials_fields"],
            )
            for d in block_defs
        }
        with patch(
            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
            new_callable=AsyncMock,
            return_value=(search_results, len(search_results)),
        ), patch(
            "backend.api.features.chat.tools.find_block.get_block",
            side_effect=lambda bid: mock_blocks.get(bid),
        ):
            tool = FindBlockTool()
            response = await tool._execute(
                user_id=_TEST_USER_ID, session=session, query="test"
            )
        assert isinstance(response, BlockListResponse)
        assert response.count == len(block_defs)
        total_chars = len(response.model_dump_json())
        avg_chars = total_chars // response.count
        # Print for visibility in test output
        print(f"\nTotal response size: {total_chars} chars")
        print(f"Number of blocks: {response.count}")
        print(f"Average chars per block: {avg_chars}")
        # The old response was ~90K for 10 blocks (~9K per block).
        # Previous optimization reduced it to ~1.5K per block (no raw JSON schemas).
        # Now with only id/name/description, we expect ~300 chars per block.
        assert avg_chars < 500, (
            f"Average chars per block ({avg_chars}) exceeds 500. "
            f"Total response: {total_chars} chars for {response.count} blocks."
        )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/models.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/models.py
@@ -25,6 +25,7 @@ class ResponseType(str, Enum):
    AGENT_SAVED = "agent_saved"
    CLARIFICATION_NEEDED = "clarification_needed"
    BLOCK_LIST = "block_list"
    BLOCK_DETAILS = "block_details"
    BLOCK_OUTPUT = "block_output"
    DOC_SEARCH_RESULTS = "doc_search_results"
    DOC_PAGE = "doc_page"
@@ -40,6 +41,12 @@ class ResponseType(str, Enum):
    OPERATION_IN_PROGRESS = "operation_in_progress"
    # Input validation
    INPUT_VALIDATION_ERROR = "input_validation_error"
    # Web fetch
    WEB_FETCH = "web_fetch"
    # Code execution
    BASH_EXEC = "bash_exec"
    # Operation status check
    OPERATION_STATUS = "operation_status"
 # Base response model
@@ -335,11 +342,17 @@ class BlockInfoSummary(BaseModel):
    name: str
    description: str
    categories: list[str]
-    input_schema: dict[str, Any]
+    input_schema: dict[str, Any] = Field(
-    output_schema: dict[str, Any]
+        default_factory=dict,
        description="Full JSON schema for block inputs",
    )
    output_schema: dict[str, Any] = Field(
        default_factory=dict,
        description="Full JSON schema for block outputs",
    )
    required_inputs: list[BlockInputFieldInfo] = Field(
        default_factory=list,
-        description="List of required input fields for this block",
+        description="List of input fields for this block",
    )
@@ -352,10 +365,29 @@ class BlockListResponse(ToolResponseBase):
    query: str
    usage_hint: str = Field(
        default="To execute a block, call run_block with block_id set to the block's "
-        "'id' field and input_data containing the required fields from input_schema."
+        "'id' field and input_data containing the fields listed in required_inputs."
    )
 class BlockDetails(BaseModel):
    """Detailed block information."""
    id: str
    name: str
    description: str
    inputs: dict[str, Any] = {}
    outputs: dict[str, Any] = {}
    credentials: list[CredentialsMetaInput] = []
 class BlockDetailsResponse(ToolResponseBase):
    """Response for block details (first run_block attempt)."""
    type: ResponseType = ResponseType.BLOCK_DETAILS
    block: BlockDetails
    user_authenticated: bool = False
 class BlockOutputResponse(ToolResponseBase):
    """Response for run_block tool."""
@@ -421,3 +453,24 @@ class AsyncProcessingResponse(ToolResponseBase):
    status: str = "accepted"  # Must be "accepted" for detection
    operation_id: str | None = None
    task_id: str | None = None
 class WebFetchResponse(ToolResponseBase):
    """Response for web_fetch tool."""
    type: ResponseType = ResponseType.WEB_FETCH
    url: str
    status_code: int
    content_type: str
    content: str
    truncated: bool = False
 class BashExecResponse(ToolResponseBase):
    """Response for bash_exec tool."""
    type: ResponseType = ResponseType.BASH_EXEC
    stdout: str
    stderr: str
    exit_code: int
    timed_out: bool = False
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
@@ -23,8 +23,11 @@ from backend.util.exceptions import BlockError
 from .base import BaseTool
 from .helpers import get_inputs_from_schema
 from .models import (
    BlockDetails,
    BlockDetailsResponse,
    BlockOutputResponse,
    ErrorResponse,
    InputValidationErrorResponse,
    SetupInfo,
    SetupRequirementsResponse,
    ToolResponseBase,
@@ -51,8 +54,8 @@ class RunBlockTool(BaseTool):
            "Execute a specific block with the provided input data. "
            "IMPORTANT: You MUST call find_block first to get the block's 'id' - "
            "do NOT guess or make up block IDs. "
-            "Use the 'id' from find_block results and provide input_data "
+            "On first attempt (without input_data), returns detailed schema showing "
-            "matching the block's required_inputs."
+            "required inputs and outputs. Then call again with proper input_data to execute."
        )
    @property
@@ -67,11 +70,19 @@ class RunBlockTool(BaseTool):
                        "NEVER guess this - always get it from find_block first."
                    ),
                },
                "block_name": {
                    "type": "string",
                    "description": (
                        "The block's human-readable name from find_block results. "
                        "Used for display purposes in the UI."
                    ),
                },
                "input_data": {
                    "type": "object",
                    "description": (
-                        "Input values for the block. Use the 'required_inputs' field "
+                        "Input values for the block. "
-                        "from find_block to see what fields are needed."
+                        "First call with empty {} to see the block's schema, "
                        "then call again with proper values to execute."
                    ),
                },
            },
@@ -156,6 +167,34 @@ class RunBlockTool(BaseTool):
            await self._resolve_block_credentials(user_id, block, input_data)
        )
        # Get block schemas for details/validation
        try:
            input_schema: dict[str, Any] = block.input_schema.jsonschema()
        except Exception as e:
            logger.warning(
                "Failed to generate input schema for block %s: %s",
                block_id,
                e,
            )
            return ErrorResponse(
                message=f"Block '{block.name}' has an invalid input schema",
                error=str(e),
                session_id=session_id,
            )
        try:
            output_schema: dict[str, Any] = block.output_schema.jsonschema()
        except Exception as e:
            logger.warning(
                "Failed to generate output schema for block %s: %s",
                block_id,
                e,
            )
            return ErrorResponse(
                message=f"Block '{block.name}' has an invalid output schema",
                error=str(e),
                session_id=session_id,
            )
        if missing_credentials:
            # Return setup requirements response with missing credentials
            credentials_fields_info = block.input_schema.get_credentials_fields_info()
@@ -188,6 +227,53 @@ class RunBlockTool(BaseTool):
                graph_version=None,
            )
        # Check if this is a first attempt (required inputs missing)
        # Return block details so user can see what inputs are needed
        credentials_fields = set(block.input_schema.get_credentials_fields().keys())
        required_keys = set(input_schema.get("required", []))
        required_non_credential_keys = required_keys - credentials_fields
        provided_input_keys = set(input_data.keys()) - credentials_fields
        # Check for unknown input fields
        valid_fields = (
            set(input_schema.get("properties", {}).keys()) - credentials_fields
        )
        unrecognized_fields = provided_input_keys - valid_fields
        if unrecognized_fields:
            return InputValidationErrorResponse(
                message=(
                    f"Unknown input field(s) provided: {', '.join(sorted(unrecognized_fields))}. "
                    f"Block was not executed. Please use the correct field names from the schema."
                ),
                session_id=session_id,
                unrecognized_fields=sorted(unrecognized_fields),
                inputs=input_schema,
            )
        # Show details when not all required non-credential inputs are provided
        if not (required_non_credential_keys <= provided_input_keys):
            # Get credentials info for the response
            credentials_meta = []
            for field_name, cred_meta in matched_credentials.items():
                credentials_meta.append(cred_meta)
            return BlockDetailsResponse(
                message=(
                    f"Block '{block.name}' details. "
                    "Provide input_data matching the inputs schema to execute the block."
                ),
                session_id=session_id,
                block=BlockDetails(
                    id=block_id,
                    name=block.name,
                    description=block.description or "",
                    inputs=input_schema,
                    outputs=output_schema,
                    credentials=credentials_meta,
                ),
                user_authenticated=True,
            )
        try:
            # Get or create user's workspace for CoPilot file operations
            workspace = await get_or_create_workspace(user_id)
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
@@ -1,10 +1,15 @@
-"""Tests for block execution guards in RunBlockTool."""
+"""Tests for block execution guards and input validation in RunBlockTool."""
-from unittest.mock import MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
-from backend.api.features.chat.tools.models import ErrorResponse
+from backend.api.features.chat.tools.models import (
    BlockDetailsResponse,
    BlockOutputResponse,
    ErrorResponse,
    InputValidationErrorResponse,
 )
 from backend.api.features.chat.tools.run_block import RunBlockTool
 from backend.blocks._base import BlockType
@@ -28,6 +33,39 @@ def make_mock_block(
    return mock
 def make_mock_block_with_schema(
    block_id: str,
    name: str,
    input_properties: dict,
    required_fields: list[str],
    output_properties: dict | None = None,
 ):
    """Create a mock block with a defined input/output schema for validation tests."""
    mock = MagicMock()
    mock.id = block_id
    mock.name = name
    mock.block_type = BlockType.STANDARD
    mock.disabled = False
    mock.description = f"Test block: {name}"
    input_schema = {
        "properties": input_properties,
        "required": required_fields,
    }
    mock.input_schema = MagicMock()
    mock.input_schema.jsonschema.return_value = input_schema
    mock.input_schema.get_credentials_fields_info.return_value = {}
    mock.input_schema.get_credentials_fields.return_value = {}
    output_schema = {
        "properties": output_properties or {"result": {"type": "string"}},
    }
    mock.output_schema = MagicMock()
    mock.output_schema.jsonschema.return_value = output_schema
    return mock
 class TestRunBlockFiltering:
    """Tests for block execution guards in RunBlockTool."""
@@ -104,3 +142,221 @@ class TestRunBlockFiltering:
        # (may be other errors like missing credentials, but not the exclusion guard)
        if isinstance(response, ErrorResponse):
            assert "cannot be run directly in CoPilot" not in response.message
 class TestRunBlockInputValidation:
    """Tests for input field validation in RunBlockTool.
    run_block rejects unknown input field names with InputValidationErrorResponse,
    preventing silent failures where incorrect keys would be ignored and the block
    would execute with default values instead of the caller's intended values.
    """
    @pytest.mark.asyncio(loop_scope="session")
    async def test_unknown_input_fields_are_rejected(self):
        """run_block rejects unknown input fields instead of silently ignoring them.
        Scenario: The AI Text Generator block has a field called 'model' (for LLM model
        selection), but the LLM calling the tool guesses wrong and sends 'LLM_Model'
        instead. The block should reject the request and return the valid schema.
        """
        session = make_session(user_id=_TEST_USER_ID)
        mock_block = make_mock_block_with_schema(
            block_id="ai-text-gen-id",
            name="AI Text Generator",
            input_properties={
                "prompt": {"type": "string", "description": "The prompt to send"},
                "model": {
                    "type": "string",
                    "description": "The LLM model to use",
                    "default": "gpt-4o-mini",
                },
                "sys_prompt": {
                    "type": "string",
                    "description": "System prompt",
                    "default": "",
                },
            },
            required_fields=["prompt"],
            output_properties={"response": {"type": "string"}},
        )
        with patch(
            "backend.api.features.chat.tools.run_block.get_block",
            return_value=mock_block,
        ):
            tool = RunBlockTool()
            # Provide 'prompt' (correct) but 'LLM_Model' instead of 'model' (wrong key)
            response = await tool._execute(
                user_id=_TEST_USER_ID,
                session=session,
                block_id="ai-text-gen-id",
                input_data={
                    "prompt": "Write a haiku about coding",
                    "LLM_Model": "claude-opus-4-6",  # WRONG KEY - should be 'model'
                },
            )
        assert isinstance(response, InputValidationErrorResponse)
        assert "LLM_Model" in response.unrecognized_fields
        assert "Block was not executed" in response.message
        assert "inputs" in response.model_dump()  # valid schema included
    @pytest.mark.asyncio(loop_scope="session")
    async def test_multiple_wrong_keys_are_all_reported(self):
        """All unrecognized field names are reported in a single error response."""
        session = make_session(user_id=_TEST_USER_ID)
        mock_block = make_mock_block_with_schema(
            block_id="ai-text-gen-id",
            name="AI Text Generator",
            input_properties={
                "prompt": {"type": "string"},
                "model": {"type": "string", "default": "gpt-4o-mini"},
                "sys_prompt": {"type": "string", "default": ""},
                "retry": {"type": "integer", "default": 3},
            },
            required_fields=["prompt"],
        )
        with patch(
            "backend.api.features.chat.tools.run_block.get_block",
            return_value=mock_block,
        ):
            tool = RunBlockTool()
            response = await tool._execute(
                user_id=_TEST_USER_ID,
                session=session,
                block_id="ai-text-gen-id",
                input_data={
                    "prompt": "Hello",  # correct
                    "llm_model": "claude-opus-4-6",  # WRONG - should be 'model'
                    "system_prompt": "Be helpful",  # WRONG - should be 'sys_prompt'
                    "retries": 5,  # WRONG - should be 'retry'
                },
            )
        assert isinstance(response, InputValidationErrorResponse)
        assert set(response.unrecognized_fields) == {
            "llm_model",
            "system_prompt",
            "retries",
        }
        assert "Block was not executed" in response.message
    @pytest.mark.asyncio(loop_scope="session")
    async def test_unknown_fields_rejected_even_with_missing_required(self):
        """Unknown fields are caught before the missing-required-fields check."""
        session = make_session(user_id=_TEST_USER_ID)
        mock_block = make_mock_block_with_schema(
            block_id="ai-text-gen-id",
            name="AI Text Generator",
            input_properties={
                "prompt": {"type": "string"},
                "model": {"type": "string", "default": "gpt-4o-mini"},
            },
            required_fields=["prompt"],
        )
        with patch(
            "backend.api.features.chat.tools.run_block.get_block",
            return_value=mock_block,
        ):
            tool = RunBlockTool()
            # 'prompt' is missing AND 'LLM_Model' is an unknown field
            response = await tool._execute(
                user_id=_TEST_USER_ID,
                session=session,
                block_id="ai-text-gen-id",
                input_data={
                    "LLM_Model": "claude-opus-4-6",  # wrong key, and 'prompt' is missing
                },
            )
        # Unknown fields are caught first
        assert isinstance(response, InputValidationErrorResponse)
        assert "LLM_Model" in response.unrecognized_fields
    @pytest.mark.asyncio(loop_scope="session")
    async def test_correct_inputs_still_execute(self):
        """Correct input field names pass validation and the block executes."""
        session = make_session(user_id=_TEST_USER_ID)
        mock_block = make_mock_block_with_schema(
            block_id="ai-text-gen-id",
            name="AI Text Generator",
            input_properties={
                "prompt": {"type": "string"},
                "model": {"type": "string", "default": "gpt-4o-mini"},
            },
            required_fields=["prompt"],
        )
        async def mock_execute(input_data, **kwargs):
            yield "response", "Generated text"
        mock_block.execute = mock_execute
        with (
            patch(
                "backend.api.features.chat.tools.run_block.get_block",
                return_value=mock_block,
            ),
            patch(
                "backend.api.features.chat.tools.run_block.get_or_create_workspace",
                new_callable=AsyncMock,
                return_value=MagicMock(id="test-workspace-id"),
            ),
        ):
            tool = RunBlockTool()
            response = await tool._execute(
                user_id=_TEST_USER_ID,
                session=session,
                block_id="ai-text-gen-id",
                input_data={
                    "prompt": "Write a haiku",
                    "model": "gpt-4o-mini",  # correct field name
                },
            )
        assert isinstance(response, BlockOutputResponse)
        assert response.success is True
    @pytest.mark.asyncio(loop_scope="session")
    async def test_missing_required_fields_returns_details(self):
        """Missing required fields returns BlockDetailsResponse with schema."""
        session = make_session(user_id=_TEST_USER_ID)
        mock_block = make_mock_block_with_schema(
            block_id="ai-text-gen-id",
            name="AI Text Generator",
            input_properties={
                "prompt": {"type": "string"},
                "model": {"type": "string", "default": "gpt-4o-mini"},
            },
            required_fields=["prompt"],
        )
        with patch(
            "backend.api.features.chat.tools.run_block.get_block",
            return_value=mock_block,
        ):
            tool = RunBlockTool()
            # Only provide valid optional field, missing required 'prompt'
            response = await tool._execute(
                user_id=_TEST_USER_ID,
                session=session,
                block_id="ai-text-gen-id",
                input_data={
                    "model": "gpt-4o-mini",  # valid but optional
                },
            )
        assert isinstance(response, BlockDetailsResponse)
--- a/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py
@@ -0,0 +1,265 @@
 """Sandbox execution utilities for code execution tools.
 Provides filesystem + network isolated command execution using **bubblewrap**
 (``bwrap``): whitelist-only filesystem (only system dirs visible read-only),
 writable workspace only, clean environment, network blocked.
 Tools that call :func:`run_sandboxed` must first check :func:`has_full_sandbox`
 and refuse to run if bubblewrap is not available.
 """
 import asyncio
 import logging
 import os
 import platform
 import shutil
 logger = logging.getLogger(__name__)
 _DEFAULT_TIMEOUT = 30
 _MAX_TIMEOUT = 120
 # ---------------------------------------------------------------------------
 # Sandbox capability detection (cached at first call)
 # ---------------------------------------------------------------------------
 _BWRAP_AVAILABLE: bool | None = None
 def has_full_sandbox() -> bool:
    """Return True if bubblewrap is available (filesystem + network isolation).
    On non-Linux platforms (macOS), always returns False.
    """
    global _BWRAP_AVAILABLE
    if _BWRAP_AVAILABLE is None:
        _BWRAP_AVAILABLE = (
            platform.system() == "Linux" and shutil.which("bwrap") is not None
        )
    return _BWRAP_AVAILABLE
 WORKSPACE_PREFIX = "/tmp/copilot-"
 def make_session_path(session_id: str) -> str:
    """Build a sanitized, session-specific path under :data:`WORKSPACE_PREFIX`.
    Shared by both the SDK working-directory setup and the sandbox tools so
    they always resolve to the same directory for a given session.
    Steps:
        1. Strip all characters except ``[A-Za-z0-9-]``.
        2. Construct ``/tmp/copilot-<safe_id>``.
        3. Validate via ``os.path.normpath`` + ``startswith`` (CodeQL-recognised
           sanitizer) to prevent path traversal.
    Raises:
        ValueError: If the resulting path escapes the prefix.
    """
    import re
    safe_id = re.sub(r"[^A-Za-z0-9-]", "", session_id)
    if not safe_id:
        safe_id = "default"
    path = os.path.normpath(f"{WORKSPACE_PREFIX}{safe_id}")
    if not path.startswith(WORKSPACE_PREFIX):
        raise ValueError(f"Session path escaped prefix: {path}")
    return path
 def get_workspace_dir(session_id: str) -> str:
    """Get or create the workspace directory for a session.
    Uses :func:`make_session_path` — the same path the SDK uses — so that
    bash_exec shares the workspace with the SDK file tools.
    """
    workspace = make_session_path(session_id)
    os.makedirs(workspace, exist_ok=True)
    return workspace
 # ---------------------------------------------------------------------------
 # Bubblewrap command builder
 # ---------------------------------------------------------------------------
 # System directories mounted read-only inside the sandbox.
 # ONLY these are visible — /app, /root, /home, /opt, /var etc. are NOT accessible.
 _SYSTEM_RO_BINDS = [
    "/usr",  # binaries, libraries, Python interpreter
    "/etc",  # system config: ld.so, locale, passwd, alternatives
 ]
 # Compat paths: symlinks to /usr/* on modern Debian, real dirs on older systems.
 # On Debian 13 these are symlinks (e.g. /bin -> usr/bin).  bwrap --ro-bind
 # can't create a symlink target, so we detect and use --symlink instead.
 # /lib64 is critical: the ELF dynamic linker lives at /lib64/ld-linux-x86-64.so.2.
 _COMPAT_PATHS = [
    ("/bin", "usr/bin"),  # -> /usr/bin on Debian 13
    ("/sbin", "usr/sbin"),  # -> /usr/sbin on Debian 13
    ("/lib", "usr/lib"),  # -> /usr/lib on Debian 13
    ("/lib64", "usr/lib64"),  # 64-bit libraries / ELF interpreter
 ]
 # Resource limits to prevent fork bombs, memory exhaustion, and disk abuse.
 # Applied via ulimit inside the sandbox before exec'ing the user command.
 _RESOURCE_LIMITS = (
    "ulimit -u 64"  # max 64 processes  (prevents fork bombs)
    " -v 524288"  # 512 MB virtual memory
    " -f 51200"  # 50 MB max file size  (1024-byte blocks)
    " -n 256"  # 256 open file descriptors
    " 2>/dev/null"
 )
 def _build_bwrap_command(
    command: list[str], cwd: str, env: dict[str, str]
 ) -> list[str]:
    """Build a bubblewrap command with strict filesystem + network isolation.
    Security model:
    - **Whitelist-only filesystem**: only system directories (``/usr``, ``/etc``,
      ``/bin``, ``/lib``) are mounted read-only.  Application code (``/app``),
      home directories, ``/var``, ``/opt``, etc. are NOT accessible at all.
    - **Writable workspace only**: the per-session workspace is the sole
      writable path.
    - **Clean environment**: ``--clearenv`` wipes all inherited env vars.
      Only the explicitly-passed safe env vars are set inside the sandbox.
    - **Network isolation**: ``--unshare-net`` blocks all network access.
    - **Resource limits**: ulimit caps on processes (64), memory (512MB),
      file size (50MB), and open FDs (256) to prevent fork bombs and abuse.
    - **New session**: prevents terminal control escape.
    - **Die with parent**: prevents orphaned sandbox processes.
    """
    cmd = [
        "bwrap",
        # Create a new user namespace so bwrap can set up sandboxing
        # inside unprivileged Docker containers (no CAP_SYS_ADMIN needed).
        "--unshare-user",
        # Wipe all inherited environment variables (API keys, secrets, etc.)
        "--clearenv",
    ]
    # Set only the safe env vars inside the sandbox
    for key, value in env.items():
        cmd.extend(["--setenv", key, value])
    # System directories: read-only
    for path in _SYSTEM_RO_BINDS:
        cmd.extend(["--ro-bind", path, path])
    # Compat paths: use --symlink when host path is a symlink (Debian 13),
    # --ro-bind when it's a real directory (older distros).
    for path, symlink_target in _COMPAT_PATHS:
        if os.path.islink(path):
            cmd.extend(["--symlink", symlink_target, path])
        elif os.path.exists(path):
            cmd.extend(["--ro-bind", path, path])
    # Wrap the user command with resource limits:
    #   sh -c 'ulimit ...; exec "$@"' -- <original command>
    # `exec "$@"` replaces the shell so there's no extra process overhead,
    # and properly handles arguments with spaces.
    limited_command = [
        "sh",
        "-c",
        f'{_RESOURCE_LIMITS}; exec "$@"',
        "--",
        *command,
    ]
    cmd.extend(
        [
            # Fresh virtual filesystems
            "--dev",
            "/dev",
            "--proc",
            "/proc",
            "--tmpfs",
            "/tmp",
            # Workspace bind AFTER --tmpfs /tmp so it's visible through the tmpfs.
            # (workspace lives under /tmp/copilot-<session>)
            "--bind",
            cwd,
            cwd,
            # Isolation
            "--unshare-net",
            "--die-with-parent",
            "--new-session",
            "--chdir",
            cwd,
            "--",
            *limited_command,
        ]
    )
    return cmd
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
 async def run_sandboxed(
    command: list[str],
    cwd: str,
    timeout: int = _DEFAULT_TIMEOUT,
    env: dict[str, str] | None = None,
 ) -> tuple[str, str, int, bool]:
    """Run a command inside a bubblewrap sandbox.
    Callers **must** check :func:`has_full_sandbox` before calling this
    function.  If bubblewrap is not available, this function raises
    :class:`RuntimeError` rather than running unsandboxed.
    Returns:
        (stdout, stderr, exit_code, timed_out)
    """
    if not has_full_sandbox():
        raise RuntimeError(
            "run_sandboxed() requires bubblewrap but bwrap is not available. "
            "Callers must check has_full_sandbox() before calling this function."
        )
    timeout = min(max(timeout, 1), _MAX_TIMEOUT)
    safe_env = {
        "PATH": "/usr/local/bin:/usr/bin:/bin",
        "HOME": cwd,
        "TMPDIR": cwd,
        "LANG": "en_US.UTF-8",
        "PYTHONDONTWRITEBYTECODE": "1",
        "PYTHONIOENCODING": "utf-8",
    }
    if env:
        safe_env.update(env)
    full_command = _build_bwrap_command(command, cwd, safe_env)
    try:
        proc = await asyncio.create_subprocess_exec(
            *full_command,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
            cwd=cwd,
            env=safe_env,
        )
        try:
            stdout_bytes, stderr_bytes = await asyncio.wait_for(
                proc.communicate(), timeout=timeout
            )
            stdout = stdout_bytes.decode("utf-8", errors="replace")
            stderr = stderr_bytes.decode("utf-8", errors="replace")
            return stdout, stderr, proc.returncode or 0, False
        except asyncio.TimeoutError:
            proc.kill()
            await proc.communicate()
            return "", f"Execution timed out after {timeout}s", -1, True
    except RuntimeError:
        raise
    except Exception as e:
        return "", f"Sandbox error: {e}", -1, False
--- a/autogpt_platform/backend/backend/api/features/chat/tools/test_run_block_details.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/test_run_block_details.py
@@ -0,0 +1,153 @@
 """Tests for BlockDetailsResponse in RunBlockTool."""
 from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 from backend.api.features.chat.tools.models import BlockDetailsResponse
 from backend.api.features.chat.tools.run_block import RunBlockTool
 from backend.blocks._base import BlockType
 from backend.data.model import CredentialsMetaInput
 from backend.integrations.providers import ProviderName
 from ._test_data import make_session
 _TEST_USER_ID = "test-user-run-block-details"
 def make_mock_block_with_inputs(
    block_id: str, name: str, description: str = "Test description"
 ):
    """Create a mock block with input/output schemas for testing."""
    mock = MagicMock()
    mock.id = block_id
    mock.name = name
    mock.description = description
    mock.block_type = BlockType.STANDARD
    mock.disabled = False
    # Input schema with non-credential fields
    mock.input_schema = MagicMock()
    mock.input_schema.jsonschema.return_value = {
        "properties": {
            "url": {"type": "string", "description": "URL to fetch"},
            "method": {"type": "string", "description": "HTTP method"},
        },
        "required": ["url"],
    }
    mock.input_schema.get_credentials_fields.return_value = {}
    mock.input_schema.get_credentials_fields_info.return_value = {}
    # Output schema
    mock.output_schema = MagicMock()
    mock.output_schema.jsonschema.return_value = {
        "properties": {
            "response": {"type": "object", "description": "HTTP response"},
            "error": {"type": "string", "description": "Error message"},
        }
    }
    return mock
@pytest.mark.asyncio(loop_scope="session")
 async def test_run_block_returns_details_when_no_input_provided():
    """When run_block is called without input_data, it should return BlockDetailsResponse."""
    session = make_session(user_id=_TEST_USER_ID)
    # Create a block with inputs
    http_block = make_mock_block_with_inputs(
        "http-block-id", "HTTP Request", "Send HTTP requests"
    )
    with patch(
        "backend.api.features.chat.tools.run_block.get_block",
        return_value=http_block,
    ):
        # Mock credentials check to return no missing credentials
        with patch.object(
            RunBlockTool,
            "_resolve_block_credentials",
            new_callable=AsyncMock,
            return_value=({}, []),  # (matched_credentials, missing_credentials)
        ):
            tool = RunBlockTool()
            response = await tool._execute(
                user_id=_TEST_USER_ID,
                session=session,
                block_id="http-block-id",
                input_data={},  # Empty input data
            )
    # Should return BlockDetailsResponse showing the schema
    assert isinstance(response, BlockDetailsResponse)
    assert response.block.id == "http-block-id"
    assert response.block.name == "HTTP Request"
    assert response.block.description == "Send HTTP requests"
    assert "url" in response.block.inputs["properties"]
    assert "method" in response.block.inputs["properties"]
    assert "response" in response.block.outputs["properties"]
    assert response.user_authenticated is True
@pytest.mark.asyncio(loop_scope="session")
 async def test_run_block_returns_details_when_only_credentials_provided():
    """When only credentials are provided (no actual input), should return details."""
    session = make_session(user_id=_TEST_USER_ID)
    # Create a block with both credential and non-credential inputs
    mock = MagicMock()
    mock.id = "api-block-id"
    mock.name = "API Call"
    mock.description = "Make API calls"
    mock.block_type = BlockType.STANDARD
    mock.disabled = False
    mock.input_schema = MagicMock()
    mock.input_schema.jsonschema.return_value = {
        "properties": {
            "credentials": {"type": "object", "description": "API credentials"},
            "endpoint": {"type": "string", "description": "API endpoint"},
        },
        "required": ["credentials", "endpoint"],
    }
    mock.input_schema.get_credentials_fields.return_value = {"credentials": True}
    mock.input_schema.get_credentials_fields_info.return_value = {}
    mock.output_schema = MagicMock()
    mock.output_schema.jsonschema.return_value = {
        "properties": {"result": {"type": "object"}}
    }
    with patch(
        "backend.api.features.chat.tools.run_block.get_block",
        return_value=mock,
    ):
        with patch.object(
            RunBlockTool,
            "_resolve_block_credentials",
            new_callable=AsyncMock,
            return_value=(
                {
                    "credentials": CredentialsMetaInput(
                        id="cred-id",
                        provider=ProviderName("test_provider"),
                        type="api_key",
                        title="Test Credential",
                    )
                },
                [],
            ),
        ):
            tool = RunBlockTool()
            response = await tool._execute(
                user_id=_TEST_USER_ID,
                session=session,
                block_id="api-block-id",
                input_data={"credentials": {"some": "cred"}},  # Only credential
            )
    # Should return details because no non-credential inputs provided
    assert isinstance(response, BlockDetailsResponse)
    assert response.block.id == "api-block-id"
    assert response.block.name == "API Call"
--- a/autogpt_platform/backend/backend/api/features/chat/tools/web_fetch.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/web_fetch.py
@@ -0,0 +1,151 @@
 """Web fetch tool — safely retrieve public web page content."""
 import logging
 from typing import Any
 import aiohttp
 import html2text
 from backend.api.features.chat.model import ChatSession
 from backend.api.features.chat.tools.base import BaseTool
 from backend.api.features.chat.tools.models import (
    ErrorResponse,
    ToolResponseBase,
    WebFetchResponse,
 )
 from backend.util.request import Requests
 logger = logging.getLogger(__name__)
 # Limits
 _MAX_CONTENT_BYTES = 102_400  # 100 KB download cap
 _REQUEST_TIMEOUT = aiohttp.ClientTimeout(total=15)
 # Content types we'll read as text
 _TEXT_CONTENT_TYPES = {
    "text/html",
    "text/plain",
    "text/xml",
    "text/csv",
    "text/markdown",
    "application/json",
    "application/xml",
    "application/xhtml+xml",
    "application/rss+xml",
    "application/atom+xml",
 }
 def _is_text_content(content_type: str) -> bool:
    base = content_type.split(";")[0].strip().lower()
    return base in _TEXT_CONTENT_TYPES or base.startswith("text/")
 def _html_to_text(html: str) -> str:
    h = html2text.HTML2Text()
    h.ignore_links = False
    h.ignore_images = True
    h.body_width = 0
    return h.handle(html)
 class WebFetchTool(BaseTool):
    """Safely fetch content from a public URL using SSRF-protected HTTP."""
    @property
    def name(self) -> str:
        return "web_fetch"
    @property
    def description(self) -> str:
        return (
            "Fetch the content of a public web page by URL. "
            "Returns readable text extracted from HTML by default. "
            "Useful for reading documentation, articles, and API responses. "
            "Only supports HTTP/HTTPS GET requests to public URLs "
            "(private/internal network addresses are blocked)."
        )
    @property
    def parameters(self) -> dict[str, Any]:
        return {
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The public HTTP/HTTPS URL to fetch.",
                },
                "extract_text": {
                    "type": "boolean",
                    "description": (
                        "If true (default), extract readable text from HTML. "
                        "If false, return raw content."
                    ),
                    "default": True,
                },
            },
            "required": ["url"],
        }
    @property
    def requires_auth(self) -> bool:
        return False
    async def _execute(
        self,
        user_id: str | None,
        session: ChatSession,
        **kwargs: Any,
    ) -> ToolResponseBase:
        url: str = (kwargs.get("url") or "").strip()
        extract_text: bool = kwargs.get("extract_text", True)
        session_id = session.session_id if session else None
        if not url:
            return ErrorResponse(
                message="Please provide a URL to fetch.",
                error="missing_url",
                session_id=session_id,
            )
        try:
            client = Requests(raise_for_status=False, retry_max_attempts=1)
            response = await client.get(url, timeout=_REQUEST_TIMEOUT)
        except ValueError as e:
            # validate_url raises ValueError for SSRF / blocked IPs
            return ErrorResponse(
                message=f"URL blocked: {e}",
                error="url_blocked",
                session_id=session_id,
            )
        except Exception as e:
            logger.warning(f"[web_fetch] Request failed for {url}: {e}")
            return ErrorResponse(
                message=f"Failed to fetch URL: {e}",
                error="fetch_failed",
                session_id=session_id,
            )
        content_type = response.headers.get("content-type", "")
        if not _is_text_content(content_type):
            return ErrorResponse(
                message=f"Non-text content type: {content_type.split(';')[0]}",
                error="unsupported_content_type",
                session_id=session_id,
            )
        raw = response.content[:_MAX_CONTENT_BYTES]
        text = raw.decode("utf-8", errors="replace")
        if extract_text and "html" in content_type.lower():
            text = _html_to_text(text)
        return WebFetchResponse(
            message=f"Fetched {url}",
            url=response.url,
            status_code=response.status,
            content_type=content_type.split(";")[0].strip(),
            content=text,
            truncated=False,
            session_id=session_id,
        )
--- a/autogpt_platform/backend/backend/api/features/chat/tools/workspace_files.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/workspace_files.py
@@ -88,7 +88,9 @@ class ListWorkspaceFilesTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "List files in the user's workspace. "
+            "List files in the user's persistent workspace (cloud storage). "
            "These files survive across sessions. "
            "For ephemeral session files, use the SDK Read/Glob tools instead. "
            "Returns file names, paths, sizes, and metadata. "
            "Optionally filter by path prefix."
        )
@@ -204,7 +206,9 @@ class ReadWorkspaceFileTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Read a file from the user's workspace. "
+            "Read a file from the user's persistent workspace (cloud storage). "
            "These files survive across sessions. "
            "For ephemeral session files, use the SDK Read tool instead. "
            "Specify either file_id or path to identify the file. "
            "For small text files, returns content directly. "
            "For large or binary files, returns metadata and a download URL. "
@@ -378,7 +382,9 @@ class WriteWorkspaceFileTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Write or create a file in the user's workspace. "
+            "Write or create a file in the user's persistent workspace (cloud storage). "
            "These files survive across sessions. "
            "For ephemeral session files, use the SDK Write tool instead. "
            "Provide the content as a base64-encoded string. "
            f"Maximum file size is {Config().max_file_size_mb}MB. "
            "Files are saved to the current session's folder by default. "
@@ -523,7 +529,7 @@ class DeleteWorkspaceFileTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Delete a file from the user's workspace. "
+            "Delete a file from the user's persistent workspace (cloud storage). "
            "Specify either file_id or path to identify the file. "
            "Paths are scoped to the current session by default. "
            "Use /sessions/<session_id>/... for cross-session access."
--- a/autogpt_platform/backend/backend/blocks/claude_code.py
+++ b/autogpt_platform/backend/backend/blocks/claude_code.py
@@ -1,10 +1,10 @@
 import json
 import shlex
 import uuid
-from typing import Literal, Optional
+from typing import TYPE_CHECKING, Literal, Optional
 from e2b import AsyncSandbox as BaseAsyncSandbox
-from pydantic import BaseModel, SecretStr
+from pydantic import SecretStr
 from backend.blocks._base import (
    Block,
@@ -20,6 +20,13 @@ from backend.data.model import (
    SchemaField,
 )
 from backend.integrations.providers import ProviderName
 from backend.util.sandbox_files import (
    SandboxFileOutput,
    extract_and_store_sandbox_files,
 )
 if TYPE_CHECKING:
    from backend.executor.utils import ExecutionContext
 class ClaudeCodeExecutionError(Exception):
@@ -174,22 +181,15 @@ class ClaudeCodeBlock(Block):
            advanced=True,
        )
    class FileOutput(BaseModel):
        """A file extracted from the sandbox."""
        path: str
        relative_path: str  # Path relative to working directory (for GitHub, etc.)
        name: str
        content: str
    class Output(BlockSchemaOutput):
        response: str = SchemaField(
            description="The output/response from Claude Code execution"
        )
-        files: list["ClaudeCodeBlock.FileOutput"] = SchemaField(
+        files: list[SandboxFileOutput] = SchemaField(
            description=(
                "List of text files created/modified by Claude Code during this execution. "
-                "Each file has 'path', 'relative_path', 'name', and 'content' fields."
+                "Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. "
                "workspace_ref contains a workspace:// URI if the file was stored to workspace."
            )
        )
        conversation_history: str = SchemaField(
@@ -252,6 +252,7 @@ class ClaudeCodeBlock(Block):
                            "relative_path": "index.html",
                            "name": "index.html",
                            "content": "<html>Hello World</html>",
                            "workspace_ref": None,
                        }
                    ],
                ),
@@ -267,11 +268,12 @@ class ClaudeCodeBlock(Block):
                "execute_claude_code": lambda *args, **kwargs: (
                    "Created index.html with hello world content",  # response
                    [
-                        ClaudeCodeBlock.FileOutput(
+                        SandboxFileOutput(
                            path="/home/user/index.html",
                            relative_path="index.html",
                            name="index.html",
                            content="<html>Hello World</html>",
                            workspace_ref=None,
                        )
                    ],  # files
                    "User: Create a hello world HTML file\n"
@@ -294,7 +296,8 @@ class ClaudeCodeBlock(Block):
        existing_sandbox_id: str,
        conversation_history: str,
        dispose_sandbox: bool,
-    ) -> tuple[str, list["ClaudeCodeBlock.FileOutput"], str, str, str]:
+        execution_context: "ExecutionContext",
    ) -> tuple[str, list[SandboxFileOutput], str, str, str]:
        """
        Execute Claude Code in an E2B sandbox.
@@ -449,14 +452,18 @@ class ClaudeCodeBlock(Block):
                else:
                    new_conversation_history = turn_entry
-            # Extract files created/modified during this run
+            # Extract files created/modified during this run and store to workspace
-            files = await self._extract_files(
+            sandbox_files = await extract_and_store_sandbox_files(
-                sandbox, working_directory, start_timestamp
+                sandbox=sandbox,
                working_directory=working_directory,
                execution_context=execution_context,
                since_timestamp=start_timestamp,
                text_only=True,
            )
            return (
                response,
-                files,
+                sandbox_files,  # Already SandboxFileOutput objects
                new_conversation_history,
                current_session_id,
                sandbox_id,
@@ -471,140 +478,6 @@ class ClaudeCodeBlock(Block):
            if dispose_sandbox and sandbox:
                await sandbox.kill()
    async def _extract_files(
        self,
        sandbox: BaseAsyncSandbox,
        working_directory: str,
        since_timestamp: str | None = None,
    ) -> list["ClaudeCodeBlock.FileOutput"]:
        """
        Extract text files created/modified during this Claude Code execution.
        Args:
            sandbox: The E2B sandbox instance
            working_directory: Directory to search for files
            since_timestamp: ISO timestamp - only return files modified after this time
        Returns:
            List of FileOutput objects with path, relative_path, name, and content
        """
        files: list[ClaudeCodeBlock.FileOutput] = []
        # Text file extensions we can safely read as text
        text_extensions = {
            ".txt",
            ".md",
            ".html",
            ".htm",
            ".css",
            ".js",
            ".ts",
            ".jsx",
            ".tsx",
            ".json",
            ".xml",
            ".yaml",
            ".yml",
            ".toml",
            ".ini",
            ".cfg",
            ".conf",
            ".py",
            ".rb",
            ".php",
            ".java",
            ".c",
            ".cpp",
            ".h",
            ".hpp",
            ".cs",
            ".go",
            ".rs",
            ".swift",
            ".kt",
            ".scala",
            ".sh",
            ".bash",
            ".zsh",
            ".sql",
            ".graphql",
            ".env",
            ".gitignore",
            ".dockerfile",
            "Dockerfile",
            ".vue",
            ".svelte",
            ".astro",
            ".mdx",
            ".rst",
            ".tex",
            ".csv",
            ".log",
        }
        try:
            # List files recursively using find command
            # Exclude node_modules and .git directories, but allow hidden files
            # like .env and .gitignore (they're filtered by text_extensions later)
            # Filter by timestamp to only get files created/modified during this run
            safe_working_dir = shlex.quote(working_directory)
            timestamp_filter = ""
            if since_timestamp:
                timestamp_filter = f"-newermt {shlex.quote(since_timestamp)} "
            find_result = await sandbox.commands.run(
                f"find {safe_working_dir} -type f "
                f"{timestamp_filter}"
                f"-not -path '*/node_modules/*' "
                f"-not -path '*/.git/*' "
                f"2>/dev/null"
            )
            if find_result.stdout:
                for file_path in find_result.stdout.strip().split("\n"):
                    if not file_path:
                        continue
                    # Check if it's a text file we can read
                    is_text = any(
                        file_path.endswith(ext) for ext in text_extensions
                    ) or file_path.endswith("Dockerfile")
                    if is_text:
                        try:
                            content = await sandbox.files.read(file_path)
                            # Handle bytes or string
                            if isinstance(content, bytes):
                                content = content.decode("utf-8", errors="replace")
                            # Extract filename from path
                            file_name = file_path.split("/")[-1]
                            # Calculate relative path by stripping working directory
                            relative_path = file_path
                            if file_path.startswith(working_directory):
                                relative_path = file_path[len(working_directory) :]
                                # Remove leading slash if present
                                if relative_path.startswith("/"):
                                    relative_path = relative_path[1:]
                            files.append(
                                ClaudeCodeBlock.FileOutput(
                                    path=file_path,
                                    relative_path=relative_path,
                                    name=file_name,
                                    content=content,
                                )
                            )
                        except Exception:
                            # Skip files that can't be read
                            pass
        except Exception:
            # If file extraction fails, return empty results
            pass
        return files
    def _escape_prompt(self, prompt: str) -> str:
        """Escape the prompt for safe shell execution."""
        # Use single quotes and escape any single quotes in the prompt
@@ -617,6 +490,7 @@ class ClaudeCodeBlock(Block):
        *,
        e2b_credentials: APIKeyCredentials,
        anthropic_credentials: APIKeyCredentials,
        execution_context: "ExecutionContext",
        **kwargs,
    ) -> BlockOutput:
        try:
@@ -637,6 +511,7 @@ class ClaudeCodeBlock(Block):
                existing_sandbox_id=input_data.sandbox_id,
                conversation_history=input_data.conversation_history,
                dispose_sandbox=input_data.dispose_sandbox,
                execution_context=execution_context,
            )
            yield "response", response
--- a/autogpt_platform/backend/backend/blocks/code_executor.py
+++ b/autogpt_platform/backend/backend/blocks/code_executor.py
@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Any, Literal, Optional
+from typing import TYPE_CHECKING, Any, Literal, Optional
 from e2b_code_interpreter import AsyncSandbox
 from e2b_code_interpreter import Result as E2BExecutionResult
@@ -20,6 +20,13 @@ from backend.data.model import (
    SchemaField,
 )
 from backend.integrations.providers import ProviderName
 from backend.util.sandbox_files import (
    SandboxFileOutput,
    extract_and_store_sandbox_files,
 )
 if TYPE_CHECKING:
    from backend.executor.utils import ExecutionContext
 TEST_CREDENTIALS = APIKeyCredentials(
    id="01234567-89ab-cdef-0123-456789abcdef",
@@ -85,6 +92,9 @@ class CodeExecutionResult(MainCodeExecutionResult):
 class BaseE2BExecutorMixin:
    """Shared implementation methods for E2B executor blocks."""
    # Default working directory in E2B sandboxes
    WORKING_DIR = "/home/user"
    async def execute_code(
        self,
        api_key: str,
@@ -95,14 +105,21 @@ class BaseE2BExecutorMixin:
        timeout: Optional[int] = None,
        sandbox_id: Optional[str] = None,
        dispose_sandbox: bool = False,
        execution_context: Optional["ExecutionContext"] = None,
        extract_files: bool = False,
    ):
        """
        Unified code execution method that handles all three use cases:
        1. Create new sandbox and execute (ExecuteCodeBlock)
        2. Create new sandbox, execute, and return sandbox_id (InstantiateCodeSandboxBlock)
        3. Connect to existing sandbox and execute (ExecuteCodeStepBlock)
        Args:
            extract_files: If True and execution_context provided, extract files
                           created/modified during execution and store to workspace.
        """  # noqa
        sandbox = None
        files: list[SandboxFileOutput] = []
        try:
            if sandbox_id:
                # Connect to existing sandbox (ExecuteCodeStepBlock case)
@@ -118,6 +135,12 @@ class BaseE2BExecutorMixin:
                    for cmd in setup_commands:
                        await sandbox.commands.run(cmd)
            # Capture timestamp before execution to scope file extraction
            start_timestamp = None
            if extract_files:
                ts_result = await sandbox.commands.run("date -u +%Y-%m-%dT%H:%M:%S")
                start_timestamp = ts_result.stdout.strip() if ts_result.stdout else None
            # Execute the code
            execution = await sandbox.run_code(
                code,
@@ -133,7 +156,24 @@ class BaseE2BExecutorMixin:
            stdout_logs = "".join(execution.logs.stdout)
            stderr_logs = "".join(execution.logs.stderr)
-            return results, text_output, stdout_logs, stderr_logs, sandbox.sandbox_id
+            # Extract files created/modified during this execution
            if extract_files and execution_context:
                files = await extract_and_store_sandbox_files(
                    sandbox=sandbox,
                    working_directory=self.WORKING_DIR,
                    execution_context=execution_context,
                    since_timestamp=start_timestamp,
                    text_only=False,  # Include binary files too
                )
            return (
                results,
                text_output,
                stdout_logs,
                stderr_logs,
                sandbox.sandbox_id,
                files,
            )
        finally:
            # Dispose of sandbox if requested to reduce usage costs
            if dispose_sandbox and sandbox:
@@ -238,6 +278,12 @@ class ExecuteCodeBlock(Block, BaseE2BExecutorMixin):
            description="Standard output logs from execution"
        )
        stderr_logs: str = SchemaField(description="Standard error logs from execution")
        files: list[SandboxFileOutput] = SchemaField(
            description=(
                "Files created or modified during execution. "
                "Each file has path, name, content, and workspace_ref (if stored)."
            ),
        )
    def __init__(self):
        super().__init__(
@@ -259,23 +305,30 @@ class ExecuteCodeBlock(Block, BaseE2BExecutorMixin):
                ("results", []),
                ("response", "Hello World"),
                ("stdout_logs", "Hello World\n"),
                ("files", []),
            ],
            test_mock={
-                "execute_code": lambda api_key, code, language, template_id, setup_commands, timeout, dispose_sandbox: (  # noqa
+                "execute_code": lambda api_key, code, language, template_id, setup_commands, timeout, dispose_sandbox, execution_context, extract_files: (  # noqa
                    [],  # results
                    "Hello World",  # text_output
                    "Hello World\n",  # stdout_logs
                    "",  # stderr_logs
                    "sandbox_id",  # sandbox_id
                    [],  # files
                ),
            },
        )
    async def run(
-        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
+        self,
        input_data: Input,
        *,
        credentials: APIKeyCredentials,
        execution_context: "ExecutionContext",
        **kwargs,
    ) -> BlockOutput:
        try:
-            results, text_output, stdout, stderr, _ = await self.execute_code(
+            results, text_output, stdout, stderr, _, files = await self.execute_code(
                api_key=credentials.api_key.get_secret_value(),
                code=input_data.code,
                language=input_data.language,
@@ -283,6 +336,8 @@ class ExecuteCodeBlock(Block, BaseE2BExecutorMixin):
                setup_commands=input_data.setup_commands,
                timeout=input_data.timeout,
                dispose_sandbox=input_data.dispose_sandbox,
                execution_context=execution_context,
                extract_files=True,
            )
            # Determine result object shape & filter out empty formats
@@ -296,6 +351,8 @@ class ExecuteCodeBlock(Block, BaseE2BExecutorMixin):
                yield "stdout_logs", stdout
            if stderr:
                yield "stderr_logs", stderr
            # Always yield files (empty list if none)
            yield "files", [f.model_dump() for f in files]
        except Exception as e:
            yield "error", str(e)
@@ -393,6 +450,7 @@ class InstantiateCodeSandboxBlock(Block, BaseE2BExecutorMixin):
                    "Hello World\n",  # stdout_logs
                    "",  # stderr_logs
                    "sandbox_id",  # sandbox_id
                    [],  # files
                ),
            },
        )
@@ -401,7 +459,7 @@ class InstantiateCodeSandboxBlock(Block, BaseE2BExecutorMixin):
        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
    ) -> BlockOutput:
        try:
-            _, text_output, stdout, stderr, sandbox_id = await self.execute_code(
+            _, text_output, stdout, stderr, sandbox_id, _ = await self.execute_code(
                api_key=credentials.api_key.get_secret_value(),
                code=input_data.setup_code,
                language=input_data.language,
@@ -500,6 +558,7 @@ class ExecuteCodeStepBlock(Block, BaseE2BExecutorMixin):
                    "Hello World\n",  # stdout_logs
                    "",  # stderr_logs
                    sandbox_id,  # sandbox_id
                    [],  # files
                ),
            },
        )
@@ -508,7 +567,7 @@ class ExecuteCodeStepBlock(Block, BaseE2BExecutorMixin):
        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
    ) -> BlockOutput:
        try:
-            results, text_output, stdout, stderr, _ = await self.execute_code(
+            results, text_output, stdout, stderr, _, _ = await self.execute_code(
                api_key=credentials.api_key.get_secret_value(),
                code=input_data.step_code,
                language=input_data.language,
--- a/autogpt_platform/backend/backend/util/feature_flag.py
+++ b/autogpt_platform/backend/backend/util/feature_flag.py
@@ -38,6 +38,7 @@ class Flag(str, Enum):
    AGENT_ACTIVITY = "agent-activity"
    ENABLE_PLATFORM_PAYMENT = "enable-platform-payment"
    CHAT = "chat"
    COPILOT_SDK = "copilot-sdk"
 def is_configured() -> bool:
--- a/autogpt_platform/backend/backend/util/sandbox_files.py
+++ b/autogpt_platform/backend/backend/util/sandbox_files.py
@@ -0,0 +1,288 @@
 """
 Shared utilities for extracting and storing files from E2B sandboxes.
 This module provides common file extraction and workspace storage functionality
 for blocks that run code in E2B sandboxes (Claude Code, Code Executor, etc.).
 """
 import base64
 import logging
 import mimetypes
 import shlex
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 from pydantic import BaseModel
 from backend.util.file import store_media_file
 from backend.util.type import MediaFileType
 if TYPE_CHECKING:
    from e2b import AsyncSandbox as BaseAsyncSandbox
    from backend.executor.utils import ExecutionContext
 logger = logging.getLogger(__name__)
 # Text file extensions that can be safely read and stored as text
 TEXT_EXTENSIONS = {
    ".txt",
    ".md",
    ".html",
    ".htm",
    ".css",
    ".js",
    ".ts",
    ".jsx",
    ".tsx",
    ".json",
    ".xml",
    ".yaml",
    ".yml",
    ".toml",
    ".ini",
    ".cfg",
    ".conf",
    ".py",
    ".rb",
    ".php",
    ".java",
    ".c",
    ".cpp",
    ".h",
    ".hpp",
    ".cs",
    ".go",
    ".rs",
    ".swift",
    ".kt",
    ".scala",
    ".sh",
    ".bash",
    ".zsh",
    ".sql",
    ".graphql",
    ".env",
    ".gitignore",
    ".dockerfile",
    "Dockerfile",
    ".vue",
    ".svelte",
    ".astro",
    ".mdx",
    ".rst",
    ".tex",
    ".csv",
    ".log",
 }
 class SandboxFileOutput(BaseModel):
    """A file extracted from a sandbox and optionally stored in workspace."""
    path: str
    """Full path in the sandbox."""
    relative_path: str
    """Path relative to the working directory."""
    name: str
    """Filename only."""
    content: str
    """File content as text (for backward compatibility)."""
    workspace_ref: str | None = None
    """Workspace reference (workspace://{id}#mime) if stored, None otherwise."""
@dataclass
 class ExtractedFile:
    """Internal representation of an extracted file before storage."""
    path: str
    relative_path: str
    name: str
    content: bytes
    is_text: bool
 async def extract_sandbox_files(
    sandbox: "BaseAsyncSandbox",
    working_directory: str,
    since_timestamp: str | None = None,
    text_only: bool = True,
 ) -> list[ExtractedFile]:
    """
    Extract files from an E2B sandbox.
    Args:
        sandbox: The E2B sandbox instance
        working_directory: Directory to search for files
        since_timestamp: ISO timestamp - only return files modified after this time
        text_only: If True, only extract text files (default). If False, extract all files.
    Returns:
        List of ExtractedFile objects with path, content, and metadata
    """
    files: list[ExtractedFile] = []
    try:
        # Build find command
        safe_working_dir = shlex.quote(working_directory)
        timestamp_filter = ""
        if since_timestamp:
            timestamp_filter = f"-newermt {shlex.quote(since_timestamp)} "
        find_result = await sandbox.commands.run(
            f"find {safe_working_dir} -type f "
            f"{timestamp_filter}"
            f"-not -path '*/node_modules/*' "
            f"-not -path '*/.git/*' "
            f"2>/dev/null"
        )
        if not find_result.stdout:
            return files
        for file_path in find_result.stdout.strip().split("\n"):
            if not file_path:
                continue
            # Check if it's a text file
            is_text = any(file_path.endswith(ext) for ext in TEXT_EXTENSIONS)
            # Skip non-text files if text_only mode
            if text_only and not is_text:
                continue
            try:
                # Read file content as bytes
                content = await sandbox.files.read(file_path, format="bytes")
                if isinstance(content, str):
                    content = content.encode("utf-8")
                elif isinstance(content, bytearray):
                    content = bytes(content)
                # Extract filename from path
                file_name = file_path.split("/")[-1]
                # Calculate relative path
                relative_path = file_path
                if file_path.startswith(working_directory):
                    relative_path = file_path[len(working_directory) :]
                    if relative_path.startswith("/"):
                        relative_path = relative_path[1:]
                files.append(
                    ExtractedFile(
                        path=file_path,
                        relative_path=relative_path,
                        name=file_name,
                        content=content,
                        is_text=is_text,
                    )
                )
            except Exception as e:
                logger.debug(f"Failed to read file {file_path}: {e}")
                continue
    except Exception as e:
        logger.warning(f"File extraction failed: {e}")
    return files
 async def store_sandbox_files(
    extracted_files: list[ExtractedFile],
    execution_context: "ExecutionContext",
 ) -> list[SandboxFileOutput]:
    """
    Store extracted sandbox files to workspace and return output objects.
    Args:
        extracted_files: List of files extracted from sandbox
        execution_context: Execution context for workspace storage
    Returns:
        List of SandboxFileOutput objects with workspace refs
    """
    outputs: list[SandboxFileOutput] = []
    for file in extracted_files:
        # Decode content for text files (for backward compat content field)
        if file.is_text:
            try:
                content_str = file.content.decode("utf-8", errors="replace")
            except Exception:
                content_str = ""
        else:
            content_str = f"[Binary file: {len(file.content)} bytes]"
        # Build data URI (needed for storage and as binary fallback)
        mime_type = mimetypes.guess_type(file.name)[0] or "application/octet-stream"
        data_uri = f"data:{mime_type};base64,{base64.b64encode(file.content).decode()}"
        # Try to store in workspace
        workspace_ref: str | None = None
        try:
            result = await store_media_file(
                file=MediaFileType(data_uri),
                execution_context=execution_context,
                return_format="for_block_output",
            )
            if result.startswith("workspace://"):
                workspace_ref = result
            elif not file.is_text:
                # Non-workspace context (graph execution): store_media_file
                # returned a data URI — use it as content so binary data isn't lost.
                content_str = result
        except Exception as e:
            logger.warning(f"Failed to store file {file.name} to workspace: {e}")
            # For binary files, fall back to data URI to prevent data loss
            if not file.is_text:
                content_str = data_uri
        outputs.append(
            SandboxFileOutput(
                path=file.path,
                relative_path=file.relative_path,
                name=file.name,
                content=content_str,
                workspace_ref=workspace_ref,
            )
        )
    return outputs
 async def extract_and_store_sandbox_files(
    sandbox: "BaseAsyncSandbox",
    working_directory: str,
    execution_context: "ExecutionContext",
    since_timestamp: str | None = None,
    text_only: bool = True,
 ) -> list[SandboxFileOutput]:
    """
    Extract files from sandbox and store them in workspace.
    This is the main entry point combining extraction and storage.
    Args:
        sandbox: The E2B sandbox instance
        working_directory: Directory to search for files
        execution_context: Execution context for workspace storage
        since_timestamp: ISO timestamp - only return files modified after this time
        text_only: If True, only extract text files
    Returns:
        List of SandboxFileOutput objects with content and workspace refs
    """
    extracted = await extract_sandbox_files(
        sandbox=sandbox,
        working_directory=working_directory,
        since_timestamp=since_timestamp,
        text_only=text_only,
    )
    return await store_sandbox_files(extracted, execution_context)
--- a/autogpt_platform/backend/backend/util/settings.py
+++ b/autogpt_platform/backend/backend/util/settings.py
@@ -368,6 +368,10 @@ class Config(UpdateTrackingModel["Config"], BaseSettings):
        default=600,
        description="The timeout in seconds for Agent Generator service requests (includes retries for rate limits)",
    )
    agentgenerator_use_dummy: bool = Field(
        default=False,
        description="Use dummy agent generator responses for testing (bypasses external service)",
    )
    enable_example_blocks: bool = Field(
        default=False,
--- a/autogpt_platform/backend/poetry.lock
+++ b/autogpt_platform/backend/poetry.lock
@@ -441,14 +441,14 @@ develop = true
 colorama = "^0.4.6"
 cryptography = "^46.0"
 expiringdict = "^1.2.2"
-fastapi = "^0.128.0"
+fastapi = "^0.128.7"
 google-cloud-logging = "^3.13.0"
-launchdarkly-server-sdk = "^9.14.1"
+launchdarkly-server-sdk = "^9.15.0"
 pydantic = "^2.12.5"
 pydantic-settings = "^2.12.0"
 pyjwt = {version = "^2.11.0", extras = ["crypto"]}
 redis = "^6.2.0"
-supabase = "^2.27.2"
+supabase = "^2.28.0"
 uvicorn = "^0.40.0"
 [package.source]
@@ -897,6 +897,29 @@ files = [
    {file = "charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a"},
 ]
 [[package]]
 name = "claude-agent-sdk"
 version = "0.1.35"
 description = "Python SDK for Claude Code"
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
 files = [
    {file = "claude_agent_sdk-0.1.35-py3-none-macosx_11_0_arm64.whl", hash = "sha256:df67f4deade77b16a9678b3a626c176498e40417f33b04beda9628287f375591"},
    {file = "claude_agent_sdk-0.1.35-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:14963944f55ded7c8ed518feebfa5b4284aa6dd8d81aeff2e5b21a962ce65097"},
    {file = "claude_agent_sdk-0.1.35-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:84344dcc535d179c1fc8a11c6f34c37c3b583447bdf09d869effb26514fd7a65"},
    {file = "claude_agent_sdk-0.1.35-py3-none-win_amd64.whl", hash = "sha256:1b3d54b47448c93f6f372acd4d1757f047c3c1e8ef5804be7a1e3e53e2c79a5f"},
    {file = "claude_agent_sdk-0.1.35.tar.gz", hash = "sha256:0f98e2b3c71ca85abfc042e7a35c648df88e87fda41c52e6779ef7b038dcbb52"},
 ]
 [package.dependencies]
 anyio = ">=4.0.0"
 mcp = ">=0.1.0"
 typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
 [package.extras]
 dev = ["anyio[trio] (>=4.0.0)", "mypy (>=1.0.0)", "pytest (>=7.0.0)", "pytest-asyncio (>=0.20.0)", "pytest-cov (>=4.0.0)", "ruff (>=0.1.0)"]
 [[package]]
 name = "cleo"
 version = "2.1.0"
@@ -1382,14 +1405,14 @@ tzdata = "*"
 [[package]]
 name = "fastapi"
-version = "0.128.6"
+version = "0.128.7"
 description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "fastapi-0.128.6-py3-none-any.whl", hash = "sha256:bb1c1ef87d6086a7132d0ab60869d6f1ee67283b20fbf84ec0003bd335099509"},
+    {file = "fastapi-0.128.7-py3-none-any.whl", hash = "sha256:6bd9bd31cb7047465f2d3fa3ba3f33b0870b17d4eaf7cdb36d1576ab060ad662"},
-    {file = "fastapi-0.128.6.tar.gz", hash = "sha256:0cb3946557e792d731b26a42b04912f16367e3c3135ea8290f620e234f2b604f"},
+    {file = "fastapi-0.128.7.tar.gz", hash = "sha256:783c273416995486c155ad2c0e2b45905dedfaf20b9ef8d9f6a9124670639a24"},
 ]
 [package.dependencies]
@@ -2593,6 +2616,18 @@ http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
 zstd = ["zstandard (>=0.18.0)"]
 [[package]]
 name = "httpx-sse"
 version = "0.4.3"
 description = "Consume Server-Sent Event (SSE) messages with HTTPX."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
    {file = "httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc"},
    {file = "httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d"},
 ]
 [[package]]
 name = "huggingface-hub"
 version = "1.4.1"
@@ -3117,14 +3152,14 @@ urllib3 = ">=1.26.0,<3"
 [[package]]
 name = "launchdarkly-server-sdk"
-version = "9.14.1"
+version = "9.15.0"
 description = "LaunchDarkly SDK for Python"
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.10"
 groups = ["main"]
 files = [
-    {file = "launchdarkly_server_sdk-9.14.1-py3-none-any.whl", hash = "sha256:a9e2bd9ecdef845cd631ae0d4334a1115e5b44257c42eb2349492be4bac7815c"},
+    {file = "launchdarkly_server_sdk-9.15.0-py3-none-any.whl", hash = "sha256:c267e29bfa3fb5e2a06a208448ada6ed5557a2924979b8d79c970b45d227c668"},
-    {file = "launchdarkly_server_sdk-9.14.1.tar.gz", hash = "sha256:1df44baf0a0efa74d8c1dad7a00592b98bce7d19edded7f770da8dbc49922213"},
+    {file = "launchdarkly_server_sdk-9.15.0.tar.gz", hash = "sha256:f31441b74bc1a69c381db57c33116509e407a2612628ad6dff0a7dbb39d5020b"},
 ]
 [package.dependencies]
@@ -3310,6 +3345,39 @@ files = [
    {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
 ]
 [[package]]
 name = "mcp"
 version = "1.26.0"
 description = "Model Context Protocol SDK"
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
 files = [
    {file = "mcp-1.26.0-py3-none-any.whl", hash = "sha256:904a21c33c25aa98ddbeb47273033c435e595bbacfdb177f4bd87f6dceebe1ca"},
    {file = "mcp-1.26.0.tar.gz", hash = "sha256:db6e2ef491eecc1a0d93711a76f28dec2e05999f93afd48795da1c1137142c66"},
 ]
 [package.dependencies]
 anyio = ">=4.5"
 httpx = ">=0.27.1"
 httpx-sse = ">=0.4"
 jsonschema = ">=4.20.0"
 pydantic = ">=2.11.0,<3.0.0"
 pydantic-settings = ">=2.5.2"
 pyjwt = {version = ">=2.10.1", extras = ["crypto"]}
 python-multipart = ">=0.0.9"
 pywin32 = {version = ">=310", markers = "sys_platform == \"win32\""}
 sse-starlette = ">=1.6.1"
 starlette = ">=0.27"
 typing-extensions = ">=4.9.0"
 typing-inspection = ">=0.4.1"
 uvicorn = {version = ">=0.31.1", markers = "sys_platform != \"emscripten\""}
 [package.extras]
 cli = ["python-dotenv (>=1.0.0)", "typer (>=0.16.0)"]
 rich = ["rich (>=13.9.4)"]
 ws = ["websockets (>=15.0.1)"]
 [[package]]
 name = "mdurl"
 version = "0.1.2"
@@ -4728,14 +4796,14 @@ tests = ["coverage-conditional-plugin (>=0.9.0)", "portalocker[redis]", "pytest
 [[package]]
 name = "postgrest"
-version = "2.27.3"
+version = "2.28.0"
 description = "PostgREST client for Python. This library provides an ORM interface to PostgREST."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "postgrest-2.27.3-py3-none-any.whl", hash = "sha256:ed79123af7127edd78d538bfe8351d277e45b1a36994a4dbf57ae27dde87a7b7"},
+    {file = "postgrest-2.28.0-py3-none-any.whl", hash = "sha256:7bca2f24dd1a1bf8a3d586c7482aba6cd41662da6733045fad585b63b7f7df75"},
-    {file = "postgrest-2.27.3.tar.gz", hash = "sha256:c2e2679addfc8eaab23197bad7ddaee6cbb4cbe8c483ebd2d2e5219543037cc3"},
+    {file = "postgrest-2.28.0.tar.gz", hash = "sha256:c36b38646d25ea4255321d3d924ce70f8d20ec7799cb42c1221d6a818d4f6515"},
 ]
 [package.dependencies]
@@ -5994,7 +6062,7 @@ description = "Python for Window Extensions"
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "platform_system == \"Windows\""
+markers = "sys_platform == \"win32\" or platform_system == \"Windows\""
 files = [
    {file = "pywin32-311-cp310-cp310-win32.whl", hash = "sha256:d03ff496d2a0cd4a5893504789d4a15399133fe82517455e78bad62efbb7f0a3"},
    {file = "pywin32-311-cp310-cp310-win_amd64.whl", hash = "sha256:797c2772017851984b97180b0bebe4b620bb86328e8a884bb626156295a63b3b"},
@@ -6260,14 +6328,14 @@ all = ["numpy"]
 [[package]]
 name = "realtime"
-version = "2.27.3"
+version = "2.28.0"
 description = ""
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "realtime-2.27.3-py3-none-any.whl", hash = "sha256:f571115f86988e33c41c895cb3fba2eaa1b693aeaede3617288f44274ca90f43"},
+    {file = "realtime-2.28.0-py3-none-any.whl", hash = "sha256:db1bd59bab9b1fcc9f9d3b1a073bed35bf4994d720e6751f10031a58d57a3836"},
-    {file = "realtime-2.27.3.tar.gz", hash = "sha256:02b082243107656a5ef3fb63e8e2ab4c40bc199abb45adb8a42ed63f089a1041"},
+    {file = "realtime-2.28.0.tar.gz", hash = "sha256:d18cedcebd6a8f22fcd509bc767f639761eb218b7b2b6f14fc4205b6259b50fc"},
 ]
 [package.dependencies]
@@ -6974,6 +7042,28 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"]
 pymysql = ["pymysql"]
 sqlcipher = ["sqlcipher3_binary"]
 [[package]]
 name = "sse-starlette"
 version = "3.2.0"
 description = "SSE plugin for Starlette"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
    {file = "sse_starlette-3.2.0-py3-none-any.whl", hash = "sha256:5876954bd51920fc2cd51baee47a080eb88a37b5b784e615abb0b283f801cdbf"},
    {file = "sse_starlette-3.2.0.tar.gz", hash = "sha256:8127594edfb51abe44eac9c49e59b0b01f1039d0c7461c6fd91d4e03b70da422"},
 ]
 [package.dependencies]
 anyio = ">=4.7.0"
 starlette = ">=0.49.1"
 [package.extras]
 daphne = ["daphne (>=4.2.0)"]
 examples = ["aiosqlite (>=0.21.0)", "fastapi (>=0.115.12)", "sqlalchemy[asyncio] (>=2.0.41)", "uvicorn (>=0.34.0)"]
 granian = ["granian (>=2.3.1)"]
 uvicorn = ["uvicorn (>=0.34.0)"]
 [[package]]
 name = "stagehand"
 version = "0.5.9"
@@ -7024,14 +7114,14 @@ full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart
 [[package]]
 name = "storage3"
-version = "2.27.3"
+version = "2.28.0"
 description = "Supabase Storage client for Python."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "storage3-2.27.3-py3-none-any.whl", hash = "sha256:11a05b7da84bccabeeea12d940bca3760cf63fe6ca441868677335cfe4fdfbe0"},
+    {file = "storage3-2.28.0-py3-none-any.whl", hash = "sha256:ecb50efd2ac71dabbdf97e99ad346eafa630c4c627a8e5a138ceb5fbbadae716"},
-    {file = "storage3-2.27.3.tar.gz", hash = "sha256:dc1a4a010cf36d5482c5cb6c1c28fc5f00e23284342b89e4ae43b5eae8501ddb"},
+    {file = "storage3-2.28.0.tar.gz", hash = "sha256:bc1d008aff67de7a0f2bd867baee7aadbcdb6f78f5a310b4f7a38e8c13c19865"},
 ]
 [package.dependencies]
@@ -7091,35 +7181,35 @@ typing-extensions = {version = ">=4.5.0", markers = "python_version >= \"3.7\""}
 [[package]]
 name = "supabase"
-version = "2.27.3"
+version = "2.28.0"
 description = "Supabase client for Python."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "supabase-2.27.3-py3-none-any.whl", hash = "sha256:082a74642fcf9954693f1ce8c251baf23e4bda26ffdbc8dcd4c99c82e60d69ff"},
+    {file = "supabase-2.28.0-py3-none-any.whl", hash = "sha256:42776971c7d0ccca16034df1ab96a31c50228eb1eb19da4249ad2f756fc20272"},
-    {file = "supabase-2.27.3.tar.gz", hash = "sha256:5e5a348232ac4315c1032ddd687278f0b982465471f0cbb52bca7e6a66495ff3"},
+    {file = "supabase-2.28.0.tar.gz", hash = "sha256:aea299aaab2a2eed3c57e0be7fc035c6807214194cce795a3575add20268ece1"},
 ]
 [package.dependencies]
 httpx = ">=0.26,<0.29"
-postgrest = "2.27.3"
+postgrest = "2.28.0"
-realtime = "2.27.3"
+realtime = "2.28.0"
-storage3 = "2.27.3"
+storage3 = "2.28.0"
-supabase-auth = "2.27.3"
+supabase-auth = "2.28.0"
-supabase-functions = "2.27.3"
+supabase-functions = "2.28.0"
 yarl = ">=1.22.0"
 [[package]]
 name = "supabase-auth"
-version = "2.27.3"
+version = "2.28.0"
 description = "Python Client Library for Supabase Auth"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "supabase_auth-2.27.3-py3-none-any.whl", hash = "sha256:82a4262eaad85383319d394dab0eea11fcf3ebd774062aef8ea3874ae2f02579"},
+    {file = "supabase_auth-2.28.0-py3-none-any.whl", hash = "sha256:2ac85026cc285054c7fa6d41924f3a333e9ec298c013e5b5e1754039ba7caec9"},
-    {file = "supabase_auth-2.27.3.tar.gz", hash = "sha256:39894d4bc60b6f23b5cff4d0d7d4c1659e5d69563cadf014d4896f780ca8ca78"},
+    {file = "supabase_auth-2.28.0.tar.gz", hash = "sha256:2bb8f18ff39934e44b28f10918db965659f3735cd6fbfcc022fe0b82dbf8233e"},
 ]
 [package.dependencies]
@@ -7129,14 +7219,14 @@ pyjwt = {version = ">=2.10.1", extras = ["crypto"]}
 [[package]]
 name = "supabase-functions"
-version = "2.27.3"
+version = "2.28.0"
 description = "Library for Supabase Functions"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "supabase_functions-2.27.3-py3-none-any.whl", hash = "sha256:9d14a931d49ede1c6cf5fbfceb11c44061535ba1c3f310f15384964d86a83d9e"},
+    {file = "supabase_functions-2.28.0-py3-none-any.whl", hash = "sha256:30bf2d586f8df285faf0621bb5d5bb3ec3157234fc820553ca156f009475e4ae"},
-    {file = "supabase_functions-2.27.3.tar.gz", hash = "sha256:e954f1646da8ca6e7e16accef58d0884a5f97b25956ee98e7d4927a210ed92f9"},
+    {file = "supabase_functions-2.28.0.tar.gz", hash = "sha256:db3dddfc37aca5858819eb461130968473bd8c75bd284581013958526dac718b"},
 ]
 [package.dependencies]
@@ -8440,4 +8530,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "c06e96ad49388ba7a46786e9ea55ea2c1a57408e15613237b4bee40a592a12af"
+content-hash = "55e095de555482f0fe47de7695f390fe93e7bcf739b31c391b2e5e3c3d938ae3"
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -16,6 +16,7 @@ anthropic = "^0.79.0"
 apscheduler = "^3.11.1"
 autogpt-libs = { path = "../autogpt_libs", develop = true }
 bleach = { extras = ["css"], version = "^6.2.0" }
 claude-agent-sdk = "^0.1.0"
 click = "^8.2.0"
 cryptography = "^46.0"
 discord-py = "^2.5.2"
@@ -65,7 +66,7 @@ sentry-sdk = {extras = ["anthropic", "fastapi", "launchdarkly", "openai", "sqlal
 sqlalchemy = "^2.0.40"
 strenum = "^0.4.9"
 stripe = "^11.5.0"
-supabase = "2.27.3"
+supabase = "2.28.0"
 tenacity = "^9.1.4"
 todoist-api-python = "^2.1.7"
 tweepy = "^4.16.0"
--- a/autogpt_platform/backend/test/agent_generator/test_service.py
+++ b/autogpt_platform/backend/test/agent_generator/test_service.py
@@ -25,6 +25,7 @@ class TestServiceConfiguration:
        """Test that external service is not configured when host is empty."""
        mock_settings = MagicMock()
        mock_settings.config.agentgenerator_host = ""
        mock_settings.config.agentgenerator_use_dummy = False
        with patch.object(service, "_get_settings", return_value=mock_settings):
            assert service.is_external_service_configured() is False
--- a/autogpt_platform/backend/test/chat/init.py
+++ b/autogpt_platform/backend/test/chat/init.py
--- a/autogpt_platform/backend/test/chat/test_security_hooks.py
+++ b/autogpt_platform/backend/test/chat/test_security_hooks.py
@@ -0,0 +1,133 @@
 """Tests for SDK security hooks — workspace paths, tool access, and deny messages.
 These are pure unit tests with no external dependencies (no SDK, no DB, no server).
 They validate that the security hooks correctly block unauthorized paths,
 tool access, and dangerous input patterns.
 Note: Bash command validation was removed — the SDK built-in Bash tool is not in
 allowed_tools, and the bash_exec MCP tool has kernel-level network isolation
 (unshare --net) making command-level parsing unnecessary.
 """
 from backend.api.features.chat.sdk.security_hooks import (
    _validate_tool_access,
    _validate_workspace_path,
 )
 SDK_CWD = "/tmp/copilot-test-session"
 def _is_denied(result: dict) -> bool:
    hook = result.get("hookSpecificOutput", {})
    return hook.get("permissionDecision") == "deny"
 def _reason(result: dict) -> str:
    return result.get("hookSpecificOutput", {}).get("permissionDecisionReason", "")
 # ============================================================
 # Workspace path validation (Read, Write, Edit, etc.)
 # ============================================================
 class TestWorkspacePathValidation:
    def test_path_in_workspace(self):
        result = _validate_workspace_path(
            "Read", {"file_path": f"{SDK_CWD}/file.txt"}, SDK_CWD
        )
        assert not _is_denied(result)
    def test_path_outside_workspace(self):
        result = _validate_workspace_path("Read", {"file_path": "/etc/passwd"}, SDK_CWD)
        assert _is_denied(result)
    def test_tool_results_allowed(self):
        result = _validate_workspace_path(
            "Read",
            {"file_path": "~/.claude/projects/abc/tool-results/out.txt"},
            SDK_CWD,
        )
        assert not _is_denied(result)
    def test_claude_settings_blocked(self):
        result = _validate_workspace_path(
            "Read", {"file_path": "~/.claude/settings.json"}, SDK_CWD
        )
        assert _is_denied(result)
    def test_claude_projects_without_tool_results(self):
        result = _validate_workspace_path(
            "Read", {"file_path": "~/.claude/projects/abc/credentials.json"}, SDK_CWD
        )
        assert _is_denied(result)
    def test_no_path_allowed(self):
        """Glob/Grep without path defaults to cwd — should be allowed."""
        result = _validate_workspace_path("Grep", {"pattern": "foo"}, SDK_CWD)
        assert not _is_denied(result)
    def test_path_traversal_with_dotdot(self):
        result = _validate_workspace_path(
            "Read", {"file_path": f"{SDK_CWD}/../../../etc/passwd"}, SDK_CWD
        )
        assert _is_denied(result)
 # ============================================================
 # Tool access validation
 # ============================================================
 class TestToolAccessValidation:
    def test_blocked_tools(self):
        for tool in ("bash", "shell", "exec", "terminal", "command"):
            result = _validate_tool_access(tool, {})
            assert _is_denied(result), f"Tool '{tool}' should be blocked"
    def test_bash_builtin_blocked(self):
        """SDK built-in Bash (capital) is blocked as defence-in-depth."""
        result = _validate_tool_access("Bash", {"command": "echo hello"}, SDK_CWD)
        assert _is_denied(result)
        assert "Bash" in _reason(result)
    def test_workspace_tools_delegate(self):
        result = _validate_tool_access(
            "Read", {"file_path": f"{SDK_CWD}/file.txt"}, SDK_CWD
        )
        assert not _is_denied(result)
    def test_dangerous_pattern_blocked(self):
        result = _validate_tool_access("SomeUnknownTool", {"data": "sudo rm -rf /"})
        assert _is_denied(result)
    def test_safe_unknown_tool_allowed(self):
        result = _validate_tool_access("SomeSafeTool", {"data": "hello world"})
        assert not _is_denied(result)
 # ============================================================
 # Deny message quality (ntindle feedback)
 # ============================================================
 class TestDenyMessageClarity:
    """Deny messages must include [SECURITY] and 'cannot be bypassed'
    so the model knows the restriction is enforced, not a suggestion."""
    def test_blocked_tool_message(self):
        reason = _reason(_validate_tool_access("bash", {}))
        assert "[SECURITY]" in reason
        assert "cannot be bypassed" in reason
    def test_bash_builtin_blocked_message(self):
        reason = _reason(_validate_tool_access("Bash", {"command": "echo hello"}))
        assert "[SECURITY]" in reason
        assert "cannot be bypassed" in reason
    def test_workspace_path_message(self):
        reason = _reason(
            _validate_workspace_path("Read", {"file_path": "/etc/passwd"}, SDK_CWD)
        )
        assert "[SECURITY]" in reason
        assert "cannot be bypassed" in reason
--- a/autogpt_platform/backend/test/chat/test_transcript.py
+++ b/autogpt_platform/backend/test/chat/test_transcript.py
@@ -0,0 +1,255 @@
 """Unit tests for JSONL transcript management utilities."""
 import json
 import os
 from backend.api.features.chat.sdk.transcript import (
    STRIPPABLE_TYPES,
    read_transcript_file,
    strip_progress_entries,
    validate_transcript,
    write_transcript_to_tempfile,
 )
 def _make_jsonl(*entries: dict) -> str:
    return "\n".join(json.dumps(e) for e in entries) + "\n"
 # --- Fixtures ---
 METADATA_LINE = {"type": "queue-operation", "subtype": "create"}
 FILE_HISTORY = {"type": "file-history-snapshot", "files": []}
 USER_MSG = {"type": "user", "uuid": "u1", "message": {"role": "user", "content": "hi"}}
 ASST_MSG = {
    "type": "assistant",
    "uuid": "a1",
    "parentUuid": "u1",
    "message": {"role": "assistant", "content": "hello"},
 }
 PROGRESS_ENTRY = {
    "type": "progress",
    "uuid": "p1",
    "parentUuid": "u1",
    "data": {"type": "bash_progress", "stdout": "running..."},
 }
 VALID_TRANSCRIPT = _make_jsonl(METADATA_LINE, FILE_HISTORY, USER_MSG, ASST_MSG)
 # --- read_transcript_file ---
 class TestReadTranscriptFile:
    def test_returns_content_for_valid_file(self, tmp_path):
        path = tmp_path / "session.jsonl"
        path.write_text(VALID_TRANSCRIPT)
        result = read_transcript_file(str(path))
        assert result is not None
        assert "user" in result
    def test_returns_none_for_missing_file(self):
        assert read_transcript_file("/nonexistent/path.jsonl") is None
    def test_returns_none_for_empty_path(self):
        assert read_transcript_file("") is None
    def test_returns_none_for_empty_file(self, tmp_path):
        path = tmp_path / "empty.jsonl"
        path.write_text("")
        assert read_transcript_file(str(path)) is None
    def test_returns_none_for_metadata_only(self, tmp_path):
        content = _make_jsonl(METADATA_LINE, FILE_HISTORY)
        path = tmp_path / "meta.jsonl"
        path.write_text(content)
        assert read_transcript_file(str(path)) is None
    def test_returns_none_for_invalid_json(self, tmp_path):
        path = tmp_path / "bad.jsonl"
        path.write_text("not json\n{}\n{}\n")
        assert read_transcript_file(str(path)) is None
    def test_no_size_limit(self, tmp_path):
        """Large files are accepted — bucket storage has no size limit."""
        big_content = {"type": "user", "uuid": "u9", "data": "x" * 1_000_000}
        content = _make_jsonl(METADATA_LINE, FILE_HISTORY, big_content, ASST_MSG)
        path = tmp_path / "big.jsonl"
        path.write_text(content)
        result = read_transcript_file(str(path))
        assert result is not None
 # --- write_transcript_to_tempfile ---
 class TestWriteTranscriptToTempfile:
    """Tests use /tmp/copilot-* paths to satisfy the sandbox prefix check."""
    def test_writes_file_and_returns_path(self):
        cwd = "/tmp/copilot-test-write"
        try:
            result = write_transcript_to_tempfile(
                VALID_TRANSCRIPT, "sess-1234-abcd", cwd
            )
            assert result is not None
            assert os.path.isfile(result)
            assert result.endswith(".jsonl")
            with open(result) as f:
                assert f.read() == VALID_TRANSCRIPT
        finally:
            import shutil
            shutil.rmtree(cwd, ignore_errors=True)
    def test_creates_parent_directory(self):
        cwd = "/tmp/copilot-test-mkdir"
        try:
            result = write_transcript_to_tempfile(VALID_TRANSCRIPT, "sess-1234", cwd)
            assert result is not None
            assert os.path.isdir(cwd)
        finally:
            import shutil
            shutil.rmtree(cwd, ignore_errors=True)
    def test_uses_session_id_prefix(self):
        cwd = "/tmp/copilot-test-prefix"
        try:
            result = write_transcript_to_tempfile(
                VALID_TRANSCRIPT, "abcdef12-rest", cwd
            )
            assert result is not None
            assert "abcdef12" in os.path.basename(result)
        finally:
            import shutil
            shutil.rmtree(cwd, ignore_errors=True)
    def test_rejects_cwd_outside_sandbox(self, tmp_path):
        cwd = str(tmp_path / "not-copilot")
        result = write_transcript_to_tempfile(VALID_TRANSCRIPT, "sess-1234", cwd)
        assert result is None
 # --- validate_transcript ---
 class TestValidateTranscript:
    def test_valid_transcript(self):
        assert validate_transcript(VALID_TRANSCRIPT) is True
    def test_none_content(self):
        assert validate_transcript(None) is False
    def test_empty_content(self):
        assert validate_transcript("") is False
    def test_metadata_only(self):
        content = _make_jsonl(METADATA_LINE, FILE_HISTORY)
        assert validate_transcript(content) is False
    def test_user_only_no_assistant(self):
        content = _make_jsonl(METADATA_LINE, FILE_HISTORY, USER_MSG)
        assert validate_transcript(content) is False
    def test_assistant_only_no_user(self):
        content = _make_jsonl(METADATA_LINE, FILE_HISTORY, ASST_MSG)
        assert validate_transcript(content) is False
    def test_invalid_json_returns_false(self):
        assert validate_transcript("not json\n{}\n{}\n") is False
 # --- strip_progress_entries ---
 class TestStripProgressEntries:
    def test_strips_all_strippable_types(self):
        """All STRIPPABLE_TYPES are removed from the output."""
        entries = [
            USER_MSG,
            {"type": "progress", "uuid": "p1", "parentUuid": "u1"},
            {"type": "file-history-snapshot", "files": []},
            {"type": "queue-operation", "subtype": "create"},
            {"type": "summary", "text": "..."},
            {"type": "pr-link", "url": "..."},
            ASST_MSG,
        ]
        result = strip_progress_entries(_make_jsonl(*entries))
        result_types = {json.loads(line)["type"] for line in result.strip().split("\n")}
        assert result_types == {"user", "assistant"}
        for stype in STRIPPABLE_TYPES:
            assert stype not in result_types
    def test_reparents_children_of_stripped_entries(self):
        """An assistant message whose parent is a progress entry gets reparented."""
        progress = {
            "type": "progress",
            "uuid": "p1",
            "parentUuid": "u1",
            "data": {"type": "bash_progress"},
        }
        asst = {
            "type": "assistant",
            "uuid": "a1",
            "parentUuid": "p1",  # Points to progress
            "message": {"role": "assistant", "content": "done"},
        }
        content = _make_jsonl(USER_MSG, progress, asst)
        result = strip_progress_entries(content)
        lines = [json.loads(line) for line in result.strip().split("\n")]
        asst_entry = next(e for e in lines if e["type"] == "assistant")
        # Should be reparented to u1 (the user message)
        assert asst_entry["parentUuid"] == "u1"
    def test_reparents_through_chain(self):
        """Reparenting walks through multiple stripped entries."""
        p1 = {"type": "progress", "uuid": "p1", "parentUuid": "u1"}
        p2 = {"type": "progress", "uuid": "p2", "parentUuid": "p1"}
        p3 = {"type": "progress", "uuid": "p3", "parentUuid": "p2"}
        asst = {
            "type": "assistant",
            "uuid": "a1",
            "parentUuid": "p3",  # 3 levels deep
            "message": {"role": "assistant", "content": "done"},
        }
        content = _make_jsonl(USER_MSG, p1, p2, p3, asst)
        result = strip_progress_entries(content)
        lines = [json.loads(line) for line in result.strip().split("\n")]
        asst_entry = next(e for e in lines if e["type"] == "assistant")
        assert asst_entry["parentUuid"] == "u1"
    def test_preserves_non_strippable_entries(self):
        """User, assistant, and system entries are preserved."""
        system = {"type": "system", "uuid": "s1", "message": "prompt"}
        content = _make_jsonl(system, USER_MSG, ASST_MSG)
        result = strip_progress_entries(content)
        result_types = [json.loads(line)["type"] for line in result.strip().split("\n")]
        assert result_types == ["system", "user", "assistant"]
    def test_empty_input(self):
        result = strip_progress_entries("")
        # Should return just a newline (empty content stripped)
        assert result.strip() == ""
    def test_no_strippable_entries(self):
        """When there's nothing to strip, output matches input structure."""
        content = _make_jsonl(USER_MSG, ASST_MSG)
        result = strip_progress_entries(content)
        result_lines = result.strip().split("\n")
        assert len(result_lines) == 2
    def test_handles_entries_without_uuid(self):
        """Entries without uuid field are handled gracefully."""
        no_uuid = {"type": "queue-operation", "subtype": "create"}
        content = _make_jsonl(no_uuid, USER_MSG, ASST_MSG)
        result = strip_progress_entries(content)
        result_types = [json.loads(line)["type"] for line in result.strip().split("\n")]
        # queue-operation is strippable
        assert "queue-operation" not in result_types
        assert "user" in result_types
        assert "assistant" in result_types
--- a/autogpt_platform/docker-compose.platform.yml
+++ b/autogpt_platform/docker-compose.platform.yml
@@ -37,7 +37,7 @@ services:
      context: ../
      dockerfile: autogpt_platform/backend/Dockerfile
      target: migrate
-    command: ["sh", "-c", "poetry run prisma generate && poetry run gen-prisma-stub && poetry run prisma migrate deploy"]
+    command: ["sh", "-c", "prisma generate && python3 gen_prisma_types_stub.py && prisma migrate deploy"]
    develop:
      watch:
        - path: ./
@@ -56,7 +56,7 @@ services:
      test:
        [
          "CMD-SHELL",
-          "poetry run prisma migrate status | grep -q 'No pending migrations' || exit 1",
+          "prisma migrate status | grep -q 'No pending migrations' || exit 1",
        ]
      interval: 30s
      timeout: 10s
--- a/autogpt_platform/frontend/instrumentation-client.ts
+++ b/autogpt_platform/frontend/instrumentation-client.ts
@@ -22,6 +22,11 @@ Sentry.init({
  enabled: shouldEnable,
  // Suppress cross-origin stylesheet errors from Sentry Replay (rrweb)
  // serializing DOM snapshots with cross-origin stylesheets
  // (e.g., from browser extensions or CDN-loaded CSS)
  ignoreErrors: [/Not allowed to access cross-origin stylesheet/],
  // Add optional integrations for additional features
  integrations: [
    Sentry.captureConsoleIntegration(),
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx
@@ -20,6 +20,7 @@ import { FindBlocksTool } from "../../tools/FindBlocks/FindBlocks";
 import { RunAgentTool } from "../../tools/RunAgent/RunAgent";
 import { RunBlockTool } from "../../tools/RunBlock/RunBlock";
 import { SearchDocsTool } from "../../tools/SearchDocs/SearchDocs";
 import { GenericTool } from "../../tools/GenericTool/GenericTool";
 import { ViewAgentOutputTool } from "../../tools/ViewAgentOutput/ViewAgentOutput";
 // ---------------------------------------------------------------------------
@@ -159,7 +160,7 @@ export const ChatMessagesContainer = ({
  return (
    <Conversation className="min-h-0 flex-1">
-      <ConversationContent className="flex min-h-screen flex-1 flex-col gap-6 px-3 py-6">
+      <ConversationContent className="flex flex-1 flex-col gap-6 px-3 py-6">
        {isLoading && messages.length === 0 && (
          <div className="flex min-h-full flex-1 items-center justify-center">
            <LoadingSpinner className="text-neutral-600" />
@@ -255,6 +256,16 @@ export const ChatMessagesContainer = ({
                        />
                      );
                    default:
                      // Render a generic tool indicator for SDK built-in
                      // tools (Read, Glob, Grep, etc.) or any unrecognized tool
                      if (part.type.startsWith("tool-")) {
                        return (
                          <GenericTool
                            key={`${message.id}-${i}`}
                            part={part as ToolUIPart}
                          />
                        );
                      }
                      return null;
                  }
                })}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/hooks/Untitled
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/hooks/Untitled
@@ -1,10 +0,0 @@
 import { parseAsString, useQueryState } from "nuqs";
 export function useCopilotSessionId() {
  const [urlSessionId, setUrlSessionId] = useQueryState(
    "sessionId",
    parseAsString,
  );
  return { urlSessionId, setUrlSessionId };
 }
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/hooks/useLongRunningToolPolling.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/hooks/useLongRunningToolPolling.ts
@@ -0,0 +1,126 @@
 import { getGetV2GetSessionQueryKey } from "@/app/api/__generated__/endpoints/chat/chat";
 import { useQueryClient } from "@tanstack/react-query";
 import type { UIDataTypes, UIMessage, UITools } from "ai";
 import { useCallback, useEffect, useRef } from "react";
 import { convertChatSessionMessagesToUiMessages } from "../helpers/convertChatSessionToUiMessages";
 const OPERATING_TYPES = new Set([
  "operation_started",
  "operation_pending",
  "operation_in_progress",
 ]);
 const POLL_INTERVAL_MS = 1_500;
 /**
 * Detects whether any message contains a tool part whose output indicates
 * a long-running operation is still in progress.
 */
 function hasOperatingTool(
  messages: UIMessage<unknown, UIDataTypes, UITools>[],
 ) {
  for (const msg of messages) {
    for (const part of msg.parts) {
      if (!part.type.startsWith("tool-")) continue;
      const toolPart = part as { output?: unknown };
      if (!toolPart.output) continue;
      const output =
        typeof toolPart.output === "string"
          ? safeParse(toolPart.output)
          : toolPart.output;
      if (
        output &&
        typeof output === "object" &&
        "type" in output &&
        OPERATING_TYPES.has((output as { type: string }).type)
      ) {
        return true;
      }
    }
  }
  return false;
 }
 function safeParse(value: string): unknown {
  try {
    return JSON.parse(value);
  } catch {
    return null;
  }
 }
 /**
 * Polls the session endpoint while any tool is in an "operating" state
 * (operation_started / operation_pending / operation_in_progress).
 *
 * When the session data shows the tool output has changed (e.g. to
 * agent_saved), it calls `setMessages` with the updated messages.
 */
 export function useLongRunningToolPolling(
  sessionId: string | null,
  messages: UIMessage<unknown, UIDataTypes, UITools>[],
  setMessages: (
    updater: (
      prev: UIMessage<unknown, UIDataTypes, UITools>[],
    ) => UIMessage<unknown, UIDataTypes, UITools>[],
  ) => void,
 ) {
  const queryClient = useQueryClient();
  const intervalRef = useRef<ReturnType<typeof setInterval> | null>(null);
  const stopPolling = useCallback(() => {
    if (intervalRef.current) {
      clearInterval(intervalRef.current);
      intervalRef.current = null;
    }
  }, []);
  const poll = useCallback(async () => {
    if (!sessionId) return;
    // Invalidate the query cache so the next fetch gets fresh data
    await queryClient.invalidateQueries({
      queryKey: getGetV2GetSessionQueryKey(sessionId),
    });
    // Fetch fresh session data
    const data = queryClient.getQueryData<{
      status: number;
      data: { messages?: unknown[] };
    }>(getGetV2GetSessionQueryKey(sessionId));
    if (data?.status !== 200 || !data.data.messages) return;
    const freshMessages = convertChatSessionMessagesToUiMessages(
      sessionId,
      data.data.messages,
    );
    if (!freshMessages || freshMessages.length === 0) return;
    // Update when the long-running tool completed
    if (!hasOperatingTool(freshMessages)) {
      setMessages(() => freshMessages);
      stopPolling();
    }
  }, [sessionId, queryClient, setMessages, stopPolling]);
  useEffect(() => {
    const shouldPoll = hasOperatingTool(messages);
    // Always clear any previous interval first so we never leak timers
    // when the effect re-runs due to dependency changes (e.g. messages
    // updating as the LLM streams text after the tool call).
    stopPolling();
    if (shouldPoll && sessionId) {
      intervalRef.current = setInterval(() => {
        poll();
      }, POLL_INTERVAL_MS);
    }
    return () => {
      stopPolling();
    };
  }, [messages, sessionId, poll, stopPolling]);
 }
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/CreateAgent/CreateAgent.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/CreateAgent/CreateAgent.tsx
@@ -1,24 +1,30 @@
 "use client";
-import { WarningDiamondIcon } from "@phosphor-icons/react";
+import { Button } from "@/components/atoms/Button/Button";
 import { Text } from "@/components/atoms/Text/Text";
 import {
  BookOpenIcon,
  CheckFatIcon,
  PencilSimpleIcon,
  WarningDiamondIcon,
 } from "@phosphor-icons/react";
 import type { ToolUIPart } from "ai";
 import NextLink from "next/link";
 import { useCopilotChatActions } from "../../components/CopilotChatActionsProvider/useCopilotChatActions";
 import { MorphingTextAnimation } from "../../components/MorphingTextAnimation/MorphingTextAnimation";
 import { ProgressBar } from "../../components/ProgressBar/ProgressBar";
 import {
  ContentCardDescription,
  ContentCodeBlock,
  ContentGrid,
  ContentHint,
  ContentLink,
  ContentMessage,
 } from "../../components/ToolAccordion/AccordionContent";
 import { ToolAccordion } from "../../components/ToolAccordion/ToolAccordion";
 import { useAsymptoticProgress } from "../../hooks/useAsymptoticProgress";
 import {
  ClarificationQuestionsCard,
  ClarifyingQuestion,
 } from "./components/ClarificationQuestionsCard";
 import { MiniGame } from "./components/MiniGame/MiniGame";
 import {
  AccordionIcon,
  formatMaybeJson,
@@ -52,7 +58,7 @@ function getAccordionMeta(output: CreateAgentToolOutput) {
  const icon = <AccordionIcon />;
  if (isAgentSavedOutput(output)) {
-    return { icon, title: output.agent_name };
+    return { icon, title: output.agent_name, expanded: true };
  }
  if (isAgentPreviewOutput(output)) {
    return {
@@ -78,6 +84,7 @@ function getAccordionMeta(output: CreateAgentToolOutput) {
    return {
      icon,
      title: "Creating agent, this may take a few minutes. Sit back and relax.",
      expanded: true,
    };
  }
  return {
@@ -107,8 +114,6 @@ export function CreateAgentTool({ part }: Props) {
      isOperationPendingOutput(output) ||
      isOperationInProgressOutput(output));
  const progress = useAsymptoticProgress(isOperating);
  const hasExpandableContent =
    part.state === "output-available" &&
    !!output &&
@@ -152,31 +157,53 @@ export function CreateAgentTool({ part }: Props) {
        <ToolAccordion {...getAccordionMeta(output)}>
          {isOperating && (
            <ContentGrid>
-              <ProgressBar value={progress} className="max-w-[280px]" />
+              <MiniGame />
              <ContentHint>
-                This could take a few minutes, grab a coffee ☕
+                This could take a few minutes — play while you wait!
              </ContentHint>
            </ContentGrid>
          )}
          {isAgentSavedOutput(output) && (
-            <ContentGrid>
+            <div className="rounded-xl border border-border/60 bg-card p-4 shadow-sm">
-              <ContentMessage>{output.message}</ContentMessage>
+              <div className="flex items-baseline gap-2">
-              <div className="flex flex-wrap gap-2">
+                <CheckFatIcon
-                <ContentLink href={output.library_agent_link}>
+                  size={18}
-                  Open in library
+                  weight="regular"
-                </ContentLink>
+                  className="relative top-1 text-green-500"
-                <ContentLink href={output.agent_page_link}>
+                />
-                  Open in builder
+                <Text
-                </ContentLink>
+                  variant="body-medium"
                  className="text-blacks mb-2 text-[16px]"
                >
                  {output.message}
                </Text>
              </div>
-              <ContentCodeBlock>
+              <div className="mt-3 flex flex-wrap gap-4">
-                {truncateText(
+                <Button variant="outline" size="small">
-                  formatMaybeJson({ agent_id: output.agent_id }),
+                  <NextLink
-                  800,
+                    href={output.library_agent_link}
-                )}
+                    className="inline-flex items-center gap-1.5"
-              </ContentCodeBlock>
+                    target="_blank"
-            </ContentGrid>
+                    rel="noopener noreferrer"
                  >
                    <BookOpenIcon size={14} weight="regular" />
                    Open in library
                  </NextLink>
                </Button>
                <Button variant="outline" size="small">
                  <NextLink
                    href={output.agent_page_link}
                    target="_blank"
                    rel="noopener noreferrer"
                    className="inline-flex items-center gap-1.5"
                  >
                    <PencilSimpleIcon size={14} weight="regular" />
                    Open in builder
                  </NextLink>
                </Button>
              </div>
            </div>
          )}
          {isAgentPreviewOutput(output) && (
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/CreateAgent/components/MiniGame/MiniGame.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/CreateAgent/components/MiniGame/MiniGame.tsx
@@ -0,0 +1,21 @@
 "use client";
 import { useMiniGame } from "./useMiniGame";
 export function MiniGame() {
  const { canvasRef } = useMiniGame();
  return (
    <div
      className="w-full overflow-hidden rounded-md bg-background text-foreground"
      style={{ border: "1px solid #d17fff" }}
    >
      <canvas
        ref={canvasRef}
        tabIndex={0}
        className="block w-full outline-none"
        style={{ imageRendering: "pixelated" }}
      />
    </div>
  );
 }
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/CreateAgent/components/MiniGame/useMiniGame.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/CreateAgent/components/MiniGame/useMiniGame.ts
@@ -0,0 +1,579 @@
 import { useEffect, useRef } from "react";
 /* ------------------------------------------------------------------ */
 /*  Constants                                                         */
 /* ------------------------------------------------------------------ */
 const CANVAS_HEIGHT = 150;
 const GRAVITY = 0.55;
 const JUMP_FORCE = -9.5;
 const BASE_SPEED = 3;
 const SPEED_INCREMENT = 0.0008;
 const SPAWN_MIN = 70;
 const SPAWN_MAX = 130;
 const CHAR_SIZE = 18;
 const CHAR_X = 50;
 const GROUND_PAD = 20;
 const STORAGE_KEY = "copilot-minigame-highscore";
 // Colors
 const COLOR_BG = "#E8EAF6";
 const COLOR_CHAR = "#263238";
 const COLOR_BOSS = "#F50057";
 // Boss
 const BOSS_SIZE = 36;
 const BOSS_ENTER_SPEED = 2;
 const BOSS_LEAVE_SPEED = 3;
 const BOSS_SHOOT_COOLDOWN = 90;
 const BOSS_SHOTS_TO_EVADE = 5;
 const BOSS_INTERVAL = 20; // every N score
 const PROJ_SPEED = 4.5;
 const PROJ_SIZE = 12;
 /* ------------------------------------------------------------------ */
 /*  Types                                                             */
 /* ------------------------------------------------------------------ */
 interface Obstacle {
  x: number;
  width: number;
  height: number;
  scored: boolean;
 }
 interface Projectile {
  x: number;
  y: number;
  speed: number;
  evaded: boolean;
  type: "low" | "high";
 }
 interface BossState {
  phase: "inactive" | "entering" | "fighting" | "leaving";
  x: number;
  targetX: number;
  shotsEvaded: number;
  cooldown: number;
  projectiles: Projectile[];
  bob: number;
 }
 interface GameState {
  charY: number;
  vy: number;
  obstacles: Obstacle[];
  score: number;
  highScore: number;
  speed: number;
  frame: number;
  nextSpawn: number;
  running: boolean;
  over: boolean;
  groundY: number;
  boss: BossState;
  bossThreshold: number;
 }
 /* ------------------------------------------------------------------ */
 /*  Helpers                                                           */
 /* ------------------------------------------------------------------ */
 function randInt(min: number, max: number) {
  return Math.floor(Math.random() * (max - min + 1)) + min;
 }
 function readHighScore(): number {
  try {
    return parseInt(localStorage.getItem(STORAGE_KEY) || "0", 10) || 0;
  } catch {
    return 0;
  }
 }
 function writeHighScore(score: number) {
  try {
    localStorage.setItem(STORAGE_KEY, String(score));
  } catch {
    /* noop */
  }
 }
 function makeBoss(): BossState {
  return {
    phase: "inactive",
    x: 0,
    targetX: 0,
    shotsEvaded: 0,
    cooldown: 0,
    projectiles: [],
    bob: 0,
  };
 }
 function makeState(groundY: number): GameState {
  return {
    charY: groundY - CHAR_SIZE,
    vy: 0,
    obstacles: [],
    score: 0,
    highScore: readHighScore(),
    speed: BASE_SPEED,
    frame: 0,
    nextSpawn: randInt(SPAWN_MIN, SPAWN_MAX),
    running: false,
    over: false,
    groundY,
    boss: makeBoss(),
    bossThreshold: BOSS_INTERVAL,
  };
 }
 function gameOver(s: GameState) {
  s.running = false;
  s.over = true;
  if (s.score > s.highScore) {
    s.highScore = s.score;
    writeHighScore(s.score);
  }
 }
 /* ------------------------------------------------------------------ */
 /*  Projectile collision — shared between fighting & leaving phases   */
 /* ------------------------------------------------------------------ */
 /** Returns true if the player died. */
 function tickProjectiles(s: GameState): boolean {
  const boss = s.boss;
  for (const p of boss.projectiles) {
    p.x -= p.speed;
    if (!p.evaded && p.x + PROJ_SIZE < CHAR_X) {
      p.evaded = true;
      boss.shotsEvaded++;
    }
    // Collision
    if (
      !p.evaded &&
      CHAR_X + CHAR_SIZE > p.x &&
      CHAR_X < p.x + PROJ_SIZE &&
      s.charY + CHAR_SIZE > p.y &&
      s.charY < p.y + PROJ_SIZE
    ) {
      gameOver(s);
      return true;
    }
  }
  boss.projectiles = boss.projectiles.filter((p) => p.x + PROJ_SIZE > -20);
  return false;
 }
 /* ------------------------------------------------------------------ */
 /*  Update                                                            */
 /* ------------------------------------------------------------------ */
 function update(s: GameState, canvasWidth: number) {
  if (!s.running) return;
  s.frame++;
  // Speed only ramps during regular play
  if (s.boss.phase === "inactive") {
    s.speed = BASE_SPEED + s.frame * SPEED_INCREMENT;
  }
  // ---- Character physics (always active) ---- //
  s.vy += GRAVITY;
  s.charY += s.vy;
  if (s.charY + CHAR_SIZE >= s.groundY) {
    s.charY = s.groundY - CHAR_SIZE;
    s.vy = 0;
  }
  // ---- Trigger boss ---- //
  if (s.boss.phase === "inactive" && s.score >= s.bossThreshold) {
    s.boss.phase = "entering";
    s.boss.x = canvasWidth + 10;
    s.boss.targetX = canvasWidth - BOSS_SIZE - 40;
    s.boss.shotsEvaded = 0;
    s.boss.cooldown = BOSS_SHOOT_COOLDOWN;
    s.boss.projectiles = [];
    s.obstacles = [];
  }
  // ---- Boss: entering ---- //
  if (s.boss.phase === "entering") {
    s.boss.bob = Math.sin(s.frame * 0.05) * 3;
    s.boss.x -= BOSS_ENTER_SPEED;
    if (s.boss.x <= s.boss.targetX) {
      s.boss.x = s.boss.targetX;
      s.boss.phase = "fighting";
    }
    return; // no obstacles while entering
  }
  // ---- Boss: fighting ---- //
  if (s.boss.phase === "fighting") {
    s.boss.bob = Math.sin(s.frame * 0.05) * 3;
    // Shoot
    s.boss.cooldown--;
    if (s.boss.cooldown <= 0) {
      const isLow = Math.random() < 0.5;
      s.boss.projectiles.push({
        x: s.boss.x - PROJ_SIZE,
        y: isLow ? s.groundY - 14 : s.groundY - 70,
        speed: PROJ_SPEED,
        evaded: false,
        type: isLow ? "low" : "high",
      });
      s.boss.cooldown = BOSS_SHOOT_COOLDOWN;
    }
    if (tickProjectiles(s)) return;
    // Boss defeated?
    if (s.boss.shotsEvaded >= BOSS_SHOTS_TO_EVADE) {
      s.boss.phase = "leaving";
      s.score += 5; // bonus
      s.bossThreshold = s.score + BOSS_INTERVAL;
    }
    return;
  }
  // ---- Boss: leaving ---- //
  if (s.boss.phase === "leaving") {
    s.boss.bob = Math.sin(s.frame * 0.05) * 3;
    s.boss.x += BOSS_LEAVE_SPEED;
    // Still check in-flight projectiles
    if (tickProjectiles(s)) return;
    if (s.boss.x > canvasWidth + 50) {
      s.boss = makeBoss();
      s.nextSpawn = s.frame + randInt(SPAWN_MIN / 2, SPAWN_MAX / 2);
    }
    return;
  }
  // ---- Regular obstacle play ---- //
  if (s.frame >= s.nextSpawn) {
    s.obstacles.push({
      x: canvasWidth + 10,
      width: randInt(10, 16),
      height: randInt(20, 48),
      scored: false,
    });
    s.nextSpawn = s.frame + randInt(SPAWN_MIN, SPAWN_MAX);
  }
  for (const o of s.obstacles) {
    o.x -= s.speed;
    if (!o.scored && o.x + o.width < CHAR_X) {
      o.scored = true;
      s.score++;
    }
  }
  s.obstacles = s.obstacles.filter((o) => o.x + o.width > -20);
  for (const o of s.obstacles) {
    const oY = s.groundY - o.height;
    if (
      CHAR_X + CHAR_SIZE > o.x &&
      CHAR_X < o.x + o.width &&
      s.charY + CHAR_SIZE > oY
    ) {
      gameOver(s);
      return;
    }
  }
 }
 /* ------------------------------------------------------------------ */
 /*  Drawing                                                           */
 /* ------------------------------------------------------------------ */
 function drawBoss(ctx: CanvasRenderingContext2D, s: GameState, bg: string) {
  const bx = s.boss.x;
  const by = s.groundY - BOSS_SIZE + s.boss.bob;
  // Body
  ctx.save();
  ctx.fillStyle = COLOR_BOSS;
  ctx.globalAlpha = 0.9;
  ctx.beginPath();
  ctx.roundRect(bx, by, BOSS_SIZE, BOSS_SIZE, 4);
  ctx.fill();
  ctx.restore();
  // Eyes
  ctx.save();
  ctx.fillStyle = bg;
  const eyeY = by + 13;
  ctx.beginPath();
  ctx.arc(bx + 10, eyeY, 4, 0, Math.PI * 2);
  ctx.fill();
  ctx.beginPath();
  ctx.arc(bx + 26, eyeY, 4, 0, Math.PI * 2);
  ctx.fill();
  ctx.restore();
  // Angry eyebrows
  ctx.save();
  ctx.strokeStyle = bg;
  ctx.lineWidth = 2;
  ctx.beginPath();
  ctx.moveTo(bx + 5, eyeY - 7);
  ctx.lineTo(bx + 14, eyeY - 4);
  ctx.stroke();
  ctx.beginPath();
  ctx.moveTo(bx + 31, eyeY - 7);
  ctx.lineTo(bx + 22, eyeY - 4);
  ctx.stroke();
  ctx.restore();
  // Zigzag mouth
  ctx.save();
  ctx.strokeStyle = bg;
  ctx.lineWidth = 1.5;
  ctx.beginPath();
  ctx.moveTo(bx + 10, by + 27);
  ctx.lineTo(bx + 14, by + 24);
  ctx.lineTo(bx + 18, by + 27);
  ctx.lineTo(bx + 22, by + 24);
  ctx.lineTo(bx + 26, by + 27);
  ctx.stroke();
  ctx.restore();
 }
 function drawProjectiles(ctx: CanvasRenderingContext2D, boss: BossState) {
  ctx.save();
  ctx.fillStyle = COLOR_BOSS;
  ctx.globalAlpha = 0.8;
  for (const p of boss.projectiles) {
    if (p.evaded) continue;
    ctx.beginPath();
    ctx.arc(
      p.x + PROJ_SIZE / 2,
      p.y + PROJ_SIZE / 2,
      PROJ_SIZE / 2,
      0,
      Math.PI * 2,
    );
    ctx.fill();
  }
  ctx.restore();
 }
 function draw(
  ctx: CanvasRenderingContext2D,
  s: GameState,
  w: number,
  h: number,
  fg: string,
  started: boolean,
 ) {
  ctx.fillStyle = COLOR_BG;
  ctx.fillRect(0, 0, w, h);
  // Ground
  ctx.save();
  ctx.strokeStyle = fg;
  ctx.globalAlpha = 0.15;
  ctx.setLineDash([4, 4]);
  ctx.beginPath();
  ctx.moveTo(0, s.groundY);
  ctx.lineTo(w, s.groundY);
  ctx.stroke();
  ctx.restore();
  // Character
  ctx.save();
  ctx.fillStyle = COLOR_CHAR;
  ctx.globalAlpha = 0.85;
  ctx.beginPath();
  ctx.roundRect(CHAR_X, s.charY, CHAR_SIZE, CHAR_SIZE, 3);
  ctx.fill();
  ctx.restore();
  // Eyes
  ctx.save();
  ctx.fillStyle = COLOR_BG;
  ctx.beginPath();
  ctx.arc(CHAR_X + 6, s.charY + 7, 2.5, 0, Math.PI * 2);
  ctx.fill();
  ctx.beginPath();
  ctx.arc(CHAR_X + 12, s.charY + 7, 2.5, 0, Math.PI * 2);
  ctx.fill();
  ctx.restore();
  // Obstacles
  ctx.save();
  ctx.fillStyle = fg;
  ctx.globalAlpha = 0.55;
  for (const o of s.obstacles) {
    ctx.fillRect(o.x, s.groundY - o.height, o.width, o.height);
  }
  ctx.restore();
  // Boss + projectiles
  if (s.boss.phase !== "inactive") {
    drawBoss(ctx, s, COLOR_BG);
    drawProjectiles(ctx, s.boss);
  }
  // Score HUD
  ctx.save();
  ctx.fillStyle = fg;
  ctx.globalAlpha = 0.5;
  ctx.font = "bold 11px monospace";
  ctx.textAlign = "right";
  ctx.fillText(`Score: ${s.score}`, w - 12, 20);
  ctx.fillText(`Best: ${s.highScore}`, w - 12, 34);
  if (s.boss.phase === "fighting") {
    ctx.fillText(
      `Evade: ${s.boss.shotsEvaded}/${BOSS_SHOTS_TO_EVADE}`,
      w - 12,
      48,
    );
  }
  ctx.restore();
  // Prompts
  if (!started && !s.running && !s.over) {
    ctx.save();
    ctx.fillStyle = fg;
    ctx.globalAlpha = 0.5;
    ctx.font = "12px sans-serif";
    ctx.textAlign = "center";
    ctx.fillText("Click or press Space to play while you wait", w / 2, h / 2);
    ctx.restore();
  }
  if (s.over) {
    ctx.save();
    ctx.fillStyle = fg;
    ctx.globalAlpha = 0.7;
    ctx.font = "bold 13px sans-serif";
    ctx.textAlign = "center";
    ctx.fillText("Game Over", w / 2, h / 2 - 8);
    ctx.font = "11px sans-serif";
    ctx.fillText("Click or Space to restart", w / 2, h / 2 + 10);
    ctx.restore();
  }
 }
 /* ------------------------------------------------------------------ */
 /*  Hook                                                              */
 /* ------------------------------------------------------------------ */
 export function useMiniGame() {
  const canvasRef = useRef<HTMLCanvasElement>(null);
  const stateRef = useRef<GameState | null>(null);
  const rafRef = useRef(0);
  const startedRef = useRef(false);
  useEffect(() => {
    const canvas = canvasRef.current;
    if (!canvas) return;
    const container = canvas.parentElement;
    if (container) {
      canvas.width = container.clientWidth;
      canvas.height = CANVAS_HEIGHT;
    }
    const groundY = canvas.height - GROUND_PAD;
    stateRef.current = makeState(groundY);
    const style = getComputedStyle(canvas);
    let fg = style.color || "#71717a";
    // -------------------------------------------------------------- //
    //  Jump                                                           //
    // -------------------------------------------------------------- //
    function jump() {
      const s = stateRef.current;
      if (!s) return;
      if (s.over) {
        const hs = s.highScore;
        const gy = s.groundY;
        stateRef.current = makeState(gy);
        stateRef.current.highScore = hs;
        stateRef.current.running = true;
        startedRef.current = true;
        return;
      }
      if (!s.running) {
        s.running = true;
        startedRef.current = true;
        return;
      }
      // Only jump when on the ground
      if (s.charY + CHAR_SIZE >= s.groundY) {
        s.vy = JUMP_FORCE;
      }
    }
    function onKey(e: KeyboardEvent) {
      if (e.code === "Space" || e.key === " ") {
        e.preventDefault();
        jump();
      }
    }
    function onClick() {
      canvas?.focus();
      jump();
    }
    // -------------------------------------------------------------- //
    //  Loop                                                           //
    // -------------------------------------------------------------- //
    function loop() {
      const s = stateRef.current;
      if (!canvas || !s) return;
      const ctx = canvas.getContext("2d");
      if (!ctx) return;
      update(s, canvas.width);
      draw(ctx, s, canvas.width, canvas.height, fg, startedRef.current);
      rafRef.current = requestAnimationFrame(loop);
    }
    rafRef.current = requestAnimationFrame(loop);
    canvas.addEventListener("click", onClick);
    canvas.addEventListener("keydown", onKey);
    const observer = new ResizeObserver((entries) => {
      for (const entry of entries) {
        canvas.width = entry.contentRect.width;
        canvas.height = CANVAS_HEIGHT;
        if (stateRef.current) {
          stateRef.current.groundY = canvas.height - GROUND_PAD;
        }
        const cs = getComputedStyle(canvas);
        fg = cs.color || fg;
      }
    });
    if (container) observer.observe(container);
    return () => {
      cancelAnimationFrame(rafRef.current);
      canvas.removeEventListener("click", onClick);
      canvas.removeEventListener("keydown", onKey);
      observer.disconnect();
    };
  }, []);
  return { canvasRef };
 }
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/GenericTool/GenericTool.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/GenericTool/GenericTool.tsx
@@ -0,0 +1,63 @@
 "use client";
 import { ToolUIPart } from "ai";
 import { GearIcon } from "@phosphor-icons/react";
 import { MorphingTextAnimation } from "../../components/MorphingTextAnimation/MorphingTextAnimation";
 interface Props {
  part: ToolUIPart;
 }
 function extractToolName(part: ToolUIPart): string {
  // ToolUIPart.type is "tool-{name}", extract the name portion.
  return part.type.replace(/^tool-/, "");
 }
 function formatToolName(name: string): string {
  // "search_docs" → "Search docs", "Read" → "Read"
  return name.replace(/_/g, " ").replace(/^\w/, (c) => c.toUpperCase());
 }
 function getAnimationText(part: ToolUIPart): string {
  const label = formatToolName(extractToolName(part));
  switch (part.state) {
    case "input-streaming":
    case "input-available":
      return `Running ${label}…`;
    case "output-available":
      return `${label} completed`;
    case "output-error":
      return `${label} failed`;
    default:
      return `Running ${label}…`;
  }
 }
 export function GenericTool({ part }: Props) {
  const isStreaming =
    part.state === "input-streaming" || part.state === "input-available";
  const isError = part.state === "output-error";
  return (
    <div className="py-2">
      <div className="flex items-center gap-2 text-sm text-muted-foreground">
        <GearIcon
          size={14}
          weight="regular"
          className={
            isError
              ? "text-red-500"
              : isStreaming
                ? "animate-spin text-neutral-500"
                : "text-neutral-400"
          }
        />
        <MorphingTextAnimation
          text={getAnimationText(part)}
          className={isError ? "text-red-500" : undefined}
        />
      </div>
    </div>
  );
 }
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/RunBlock.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/RunBlock.tsx
@@ -3,6 +3,7 @@
 import type { ToolUIPart } from "ai";
 import { MorphingTextAnimation } from "../../components/MorphingTextAnimation/MorphingTextAnimation";
 import { ToolAccordion } from "../../components/ToolAccordion/ToolAccordion";
 import { BlockDetailsCard } from "./components/BlockDetailsCard/BlockDetailsCard";
 import { BlockOutputCard } from "./components/BlockOutputCard/BlockOutputCard";
 import { ErrorCard } from "./components/ErrorCard/ErrorCard";
 import { SetupRequirementsCard } from "./components/SetupRequirementsCard/SetupRequirementsCard";
@@ -11,6 +12,7 @@ import {
  getAnimationText,
  getRunBlockToolOutput,
  isRunBlockBlockOutput,
  isRunBlockDetailsOutput,
  isRunBlockErrorOutput,
  isRunBlockSetupRequirementsOutput,
  ToolIcon,
@@ -41,6 +43,7 @@ export function RunBlockTool({ part }: Props) {
    part.state === "output-available" &&
    !!output &&
    (isRunBlockBlockOutput(output) ||
      isRunBlockDetailsOutput(output) ||
      isRunBlockSetupRequirementsOutput(output) ||
      isRunBlockErrorOutput(output));
@@ -58,6 +61,10 @@ export function RunBlockTool({ part }: Props) {
        <ToolAccordion {...getAccordionMeta(output)}>
          {isRunBlockBlockOutput(output) && <BlockOutputCard output={output} />}
          {isRunBlockDetailsOutput(output) && (
            <BlockDetailsCard output={output} />
          )}
          {isRunBlockSetupRequirementsOutput(output) && (
            <SetupRequirementsCard output={output} />
          )}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/components/BlockDetailsCard/BlockDetailsCard.stories.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/components/BlockDetailsCard/BlockDetailsCard.stories.tsx
@@ -0,0 +1,188 @@
 import type { Meta, StoryObj } from "@storybook/nextjs";
 import { ResponseType } from "@/app/api/__generated__/models/responseType";
 import type { BlockDetailsResponse } from "../../helpers";
 import { BlockDetailsCard } from "./BlockDetailsCard";
 const meta: Meta<typeof BlockDetailsCard> = {
  title: "Copilot/RunBlock/BlockDetailsCard",
  component: BlockDetailsCard,
  parameters: {
    layout: "centered",
  },
  tags: ["autodocs"],
  decorators: [
    (Story) => (
      <div style={{ maxWidth: 480 }}>
        <Story />
      </div>
    ),
  ],
 };
 export default meta;
 type Story = StoryObj<typeof meta>;
 const baseBlock: BlockDetailsResponse = {
  type: ResponseType.block_details,
  message:
    "Here are the details for the GetWeather block. Provide the required inputs to run it.",
  session_id: "session-123",
  user_authenticated: true,
  block: {
    id: "block-abc-123",
    name: "GetWeather",
    description: "Fetches current weather data for a given location.",
    inputs: {
      type: "object",
      properties: {
        location: {
          title: "Location",
          type: "string",
          description:
            "City name or coordinates (e.g. 'London' or '51.5,-0.1')",
        },
        units: {
          title: "Units",
          type: "string",
          description: "Temperature units: 'metric' or 'imperial'",
        },
      },
      required: ["location"],
    },
    outputs: {
      type: "object",
      properties: {
        temperature: {
          title: "Temperature",
          type: "number",
          description: "Current temperature in the requested units",
        },
        condition: {
          title: "Condition",
          type: "string",
          description: "Weather condition description (e.g. 'Sunny', 'Rain')",
        },
      },
    },
    credentials: [],
  },
 };
 export const Default: Story = {
  args: {
    output: baseBlock,
  },
 };
 export const InputsOnly: Story = {
  args: {
    output: {
      ...baseBlock,
      message: "This block requires inputs. No outputs are defined.",
      block: {
        ...baseBlock.block,
        outputs: {},
      },
    },
  },
 };
 export const OutputsOnly: Story = {
  args: {
    output: {
      ...baseBlock,
      message: "This block has no required inputs.",
      block: {
        ...baseBlock.block,
        inputs: {},
      },
    },
  },
 };
 export const ManyFields: Story = {
  args: {
    output: {
      ...baseBlock,
      message: "Block with many input and output fields.",
      block: {
        ...baseBlock.block,
        name: "SendEmail",
        description: "Sends an email via SMTP.",
        inputs: {
          type: "object",
          properties: {
            to: {
              title: "To",
              type: "string",
              description: "Recipient email address",
            },
            subject: {
              title: "Subject",
              type: "string",
              description: "Email subject line",
            },
            body: {
              title: "Body",
              type: "string",
              description: "Email body content",
            },
            cc: {
              title: "CC",
              type: "string",
              description: "CC recipients (comma-separated)",
            },
            bcc: {
              title: "BCC",
              type: "string",
              description: "BCC recipients (comma-separated)",
            },
          },
          required: ["to", "subject", "body"],
        },
        outputs: {
          type: "object",
          properties: {
            message_id: {
              title: "Message ID",
              type: "string",
              description: "Unique ID of the sent email",
            },
            status: {
              title: "Status",
              type: "string",
              description: "Delivery status",
            },
          },
        },
      },
    },
  },
 };
 export const NoFieldDescriptions: Story = {
  args: {
    output: {
      ...baseBlock,
      message: "Fields without descriptions.",
      block: {
        ...baseBlock.block,
        name: "SimpleBlock",
        inputs: {
          type: "object",
          properties: {
            input_a: { title: "Input A", type: "string" },
            input_b: { title: "Input B", type: "number" },
          },
          required: ["input_a"],
        },
        outputs: {
          type: "object",
          properties: {
            result: { title: "Result", type: "string" },
          },
        },
      },
    },
  },
 };
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/components/BlockDetailsCard/BlockDetailsCard.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/components/BlockDetailsCard/BlockDetailsCard.tsx
@@ -0,0 +1,103 @@
 "use client";
 import type { BlockDetailsResponse } from "../../helpers";
 import {
  ContentBadge,
  ContentCard,
  ContentCardDescription,
  ContentCardTitle,
  ContentGrid,
  ContentMessage,
 } from "../../../../components/ToolAccordion/AccordionContent";
 interface Props {
  output: BlockDetailsResponse;
 }
 function SchemaFieldList({
  title,
  properties,
  required,
 }: {
  title: string;
  properties: Record<string, unknown>;
  required?: string[];
 }) {
  const entries = Object.entries(properties);
  if (entries.length === 0) return null;
  const requiredSet = new Set(required ?? []);
  return (
    <ContentCard>
      <ContentCardTitle className="text-xs">{title}</ContentCardTitle>
      <div className="mt-2 grid gap-2">
        {entries.map(([name, schema]) => {
          const field = schema as Record<string, unknown> | undefined;
          const fieldTitle =
            typeof field?.title === "string" ? field.title : name;
          const fieldType =
            typeof field?.type === "string" ? field.type : "unknown";
          const description =
            typeof field?.description === "string"
              ? field.description
              : undefined;
          return (
            <div key={name} className="rounded-xl border p-2">
              <div className="flex items-center justify-between gap-2">
                <ContentCardTitle className="text-xs">
                  {fieldTitle}
                </ContentCardTitle>
                <div className="flex gap-1">
                  <ContentBadge>{fieldType}</ContentBadge>
                  {requiredSet.has(name) && (
                    <ContentBadge>Required</ContentBadge>
                  )}
                </div>
              </div>
              {description && (
                <ContentCardDescription className="mt-1 text-xs">
                  {description}
                </ContentCardDescription>
              )}
            </div>
          );
        })}
      </div>
    </ContentCard>
  );
 }
 export function BlockDetailsCard({ output }: Props) {
  const inputs = output.block.inputs as {
    properties?: Record<string, unknown>;
    required?: string[];
  } | null;
  const outputs = output.block.outputs as {
    properties?: Record<string, unknown>;
    required?: string[];
  } | null;
  return (
    <ContentGrid>
      <ContentMessage>{output.message}</ContentMessage>
      {inputs?.properties && Object.keys(inputs.properties).length > 0 && (
        <SchemaFieldList
          title="Inputs"
          properties={inputs.properties}
          required={inputs.required}
        />
      )}
      {outputs?.properties && Object.keys(outputs.properties).length > 0 && (
        <SchemaFieldList
          title="Outputs"
          properties={outputs.properties}
          required={outputs.required}
        />
      )}
    </ContentGrid>
  );
 }
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/helpers.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/helpers.tsx
@@ -10,18 +10,37 @@ import {
 import type { ToolUIPart } from "ai";
 import { OrbitLoader } from "../../components/OrbitLoader/OrbitLoader";
 /** Block details returned on first run_block attempt (before input_data provided). */
 export interface BlockDetailsResponse {
  type: typeof ResponseType.block_details;
  message: string;
  session_id?: string | null;
  block: {
    id: string;
    name: string;
    description: string;
    inputs: Record<string, unknown>;
    outputs: Record<string, unknown>;
    credentials: unknown[];
  };
  user_authenticated: boolean;
 }
 export interface RunBlockInput {
  block_id?: string;
  block_name?: string;
  input_data?: Record<string, unknown>;
 }
 export type RunBlockToolOutput =
  | SetupRequirementsResponse
  | BlockDetailsResponse
  | BlockOutputResponse
  | ErrorResponse;
 const RUN_BLOCK_OUTPUT_TYPES = new Set<string>([
  ResponseType.setup_requirements,
  ResponseType.block_details,
  ResponseType.block_output,
  ResponseType.error,
 ]);
@@ -35,6 +54,15 @@ export function isRunBlockSetupRequirementsOutput(
  );
 }
 export function isRunBlockDetailsOutput(
  output: RunBlockToolOutput,
 ): output is BlockDetailsResponse {
  return (
    output.type === ResponseType.block_details ||
    ("block" in output && typeof output.block === "object")
  );
 }
 export function isRunBlockBlockOutput(
  output: RunBlockToolOutput,
 ): output is BlockOutputResponse {
@@ -64,6 +92,7 @@ function parseOutput(output: unknown): RunBlockToolOutput | null {
      return output as RunBlockToolOutput;
    }
    if ("block_id" in output) return output as BlockOutputResponse;
    if ("block" in output) return output as BlockDetailsResponse;
    if ("setup_info" in output) return output as SetupRequirementsResponse;
    if ("error" in output || "details" in output)
      return output as ErrorResponse;
@@ -84,17 +113,25 @@ export function getAnimationText(part: {
  output?: unknown;
 }): string {
  const input = part.input as RunBlockInput | undefined;
  const blockName = input?.block_name?.trim();
  const blockId = input?.block_id?.trim();
-  const blockText = blockId ? ` "${blockId}"` : "";
+  // Prefer block_name if available, otherwise fall back to block_id
  const blockText = blockName
    ? ` "${blockName}"`
    : blockId
      ? ` "${blockId}"`
      : "";
  switch (part.state) {
    case "input-streaming":
    case "input-available":
-      return `Running the block${blockText}`;
+      return `Running${blockText}`;
    case "output-available": {
      const output = parseOutput(part.output);
-      if (!output) return `Running the block${blockText}`;
+      if (!output) return `Running${blockText}`;
      if (isRunBlockBlockOutput(output)) return `Ran "${output.block_name}"`;
      if (isRunBlockDetailsOutput(output))
        return `Details for "${output.block.name}"`;
      if (isRunBlockSetupRequirementsOutput(output)) {
        return `Setup needed for "${output.setup_info.agent_name}"`;
      }
@@ -158,6 +195,21 @@ export function getAccordionMeta(output: RunBlockToolOutput): {
    };
  }
  if (isRunBlockDetailsOutput(output)) {
    const inputKeys = Object.keys(
      (output.block.inputs as { properties?: Record<string, unknown> })
        ?.properties ?? {},
    );
    return {
      icon,
      title: output.block.name,
      description:
        inputKeys.length > 0
          ? `${inputKeys.length} input field${inputKeys.length === 1 ? "" : "s"} available`
          : output.message,
    };
  }
  if (isRunBlockSetupRequirementsOutput(output)) {
    const missingCredsCount = Object.keys(
      (output.setup_info.user_readiness?.missing_credentials ?? {}) as Record<
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts
@@ -1,10 +1,14 @@
 import { useGetV2ListSessions } from "@/app/api/__generated__/endpoints/chat/chat";
 import { toast } from "@/components/molecules/Toast/use-toast";
 import { useBreakpoint } from "@/lib/hooks/useBreakpoint";
 import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
 import { useChat } from "@ai-sdk/react";
 import { DefaultChatTransport } from "ai";
-import { useEffect, useMemo, useState } from "react";
+import { useEffect, useMemo, useRef, useState } from "react";
 import { useChatSession } from "./useChatSession";
 import { useLongRunningToolPolling } from "./hooks/useLongRunningToolPolling";
 const STREAM_START_TIMEOUT_MS = 12_000;
 export function useCopilotPage() {
  const { isUserLoading, isLoggedIn } = useSupabase();
@@ -52,6 +56,24 @@ export function useCopilotPage() {
    transport: transport ?? undefined,
  });
  // Abort the stream if the backend doesn't start sending data within 12s.
  const stopRef = useRef(stop);
  stopRef.current = stop;
  useEffect(() => {
    if (status !== "submitted") return;
    const timer = setTimeout(() => {
      stopRef.current();
      toast({
        title: "Stream timed out",
        description: "The server took too long to respond. Please try again.",
        variant: "destructive",
      });
    }, STREAM_START_TIMEOUT_MS);
    return () => clearTimeout(timer);
  }, [status]);
  useEffect(() => {
    if (!hydratedMessages || hydratedMessages.length === 0) return;
    setMessages((prev) => {
@@ -60,6 +82,11 @@ export function useCopilotPage() {
    });
  }, [hydratedMessages, setMessages]);
  // Poll session endpoint when a long-running tool (create_agent, edit_agent)
  // is in progress. When the backend completes, the session data will contain
  // the final tool output — this hook detects the change and updates messages.
  useLongRunningToolPolling(sessionId, messages, setMessages);
  // Clear messages when session is null
  useEffect(() => {
    if (!sessionId) setMessages([]);
--- a/autogpt_platform/frontend/src/app/(platform)/library/agents/[id]/components/NewAgentLibraryView/components/sidebar/SidebarRunsList/components/ScheduleListItem.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/library/agents/[id]/components/NewAgentLibraryView/components/sidebar/SidebarRunsList/components/ScheduleListItem.tsx
@@ -29,6 +29,7 @@ export function ScheduleListItem({
      description={formatDistanceToNow(schedule.next_run_time, {
        addSuffix: true,
      })}
      descriptionTitle={new Date(schedule.next_run_time).toString()}
      onClick={onClick}
      selected={selected}
      icon={
--- a/autogpt_platform/frontend/src/app/(platform)/library/agents/[id]/components/NewAgentLibraryView/components/sidebar/SidebarRunsList/components/SidebarItemCard.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/library/agents/[id]/components/NewAgentLibraryView/components/sidebar/SidebarRunsList/components/SidebarItemCard.tsx
@@ -7,6 +7,7 @@ import React from "react";
 interface Props {
  title: string;
  description?: string;
  descriptionTitle?: string;
  icon?: React.ReactNode;
  selected?: boolean;
  onClick?: () => void;
@@ -16,6 +17,7 @@ interface Props {
 export function SidebarItemCard({
  title,
  description,
  descriptionTitle,
  icon,
  selected,
  onClick,
@@ -38,7 +40,11 @@ export function SidebarItemCard({
          >
            {title}
          </Text>
-          <Text variant="body" className="leading-tight !text-zinc-500">
+          <Text
            variant="body"
            className="leading-tight !text-zinc-500"
            title={descriptionTitle}
          >
            {description}
          </Text>
        </div>
--- a/autogpt_platform/frontend/src/app/(platform)/library/agents/[id]/components/NewAgentLibraryView/components/sidebar/SidebarRunsList/components/TaskListItem.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/library/agents/[id]/components/NewAgentLibraryView/components/sidebar/SidebarRunsList/components/TaskListItem.tsx
@@ -81,6 +81,9 @@ export function TaskListItem({
          ? formatDistanceToNow(run.started_at, { addSuffix: true })
          : "—"
      }
      descriptionTitle={
        run.started_at ? new Date(run.started_at).toString() : undefined
      }
      onClick={onClick}
      selected={selected}
      actions={
--- a/autogpt_platform/frontend/src/app/api/openapi.json
+++ b/autogpt_platform/frontend/src/app/api/openapi.json
@@ -1053,6 +1053,7 @@
                      "$ref": "#/components/schemas/ClarificationNeededResponse"
                    },
                    { "$ref": "#/components/schemas/BlockListResponse" },
                    { "$ref": "#/components/schemas/BlockDetailsResponse" },
                    { "$ref": "#/components/schemas/BlockOutputResponse" },
                    { "$ref": "#/components/schemas/DocSearchResultsResponse" },
                    { "$ref": "#/components/schemas/DocPageResponse" },
@@ -6958,6 +6959,58 @@
        "enum": ["run", "byte", "second"],
        "title": "BlockCostType"
      },
      "BlockDetails": {
        "properties": {
          "id": { "type": "string", "title": "Id" },
          "name": { "type": "string", "title": "Name" },
          "description": { "type": "string", "title": "Description" },
          "inputs": {
            "additionalProperties": true,
            "type": "object",
            "title": "Inputs",
            "default": {}
          },
          "outputs": {
            "additionalProperties": true,
            "type": "object",
            "title": "Outputs",
            "default": {}
          },
          "credentials": {
            "items": { "$ref": "#/components/schemas/CredentialsMetaInput" },
            "type": "array",
            "title": "Credentials",
            "default": []
          }
        },
        "type": "object",
        "required": ["id", "name", "description"],
        "title": "BlockDetails",
        "description": "Detailed block information."
      },
      "BlockDetailsResponse": {
        "properties": {
          "type": {
            "$ref": "#/components/schemas/ResponseType",
            "default": "block_details"
          },
          "message": { "type": "string", "title": "Message" },
          "session_id": {
            "anyOf": [{ "type": "string" }, { "type": "null" }],
            "title": "Session Id"
          },
          "block": { "$ref": "#/components/schemas/BlockDetails" },
          "user_authenticated": {
            "type": "boolean",
            "title": "User Authenticated",
            "default": false
          }
        },
        "type": "object",
        "required": ["message", "block"],
        "title": "BlockDetailsResponse",
        "description": "Response for block details (first run_block attempt)."
      },
      "BlockInfo": {
        "properties": {
          "id": { "type": "string", "title": "Id" },
@@ -7022,29 +7075,24 @@
          "input_schema": {
            "additionalProperties": true,
            "type": "object",
-            "title": "Input Schema"
+            "title": "Input Schema",
            "description": "Full JSON schema for block inputs"
          },
          "output_schema": {
            "additionalProperties": true,
            "type": "object",
-            "title": "Output Schema"
+            "title": "Output Schema",
            "description": "Full JSON schema for block outputs"
          },
          "required_inputs": {
            "items": { "$ref": "#/components/schemas/BlockInputFieldInfo" },
            "type": "array",
            "title": "Required Inputs",
-            "description": "List of required input fields for this block"
+            "description": "List of input fields for this block"
          }
        },
        "type": "object",
-        "required": [
+        "required": ["id", "name", "description", "categories"],
          "id",
          "name",
          "description",
          "categories",
          "input_schema",
          "output_schema"
        ],
        "title": "BlockInfoSummary",
        "description": "Summary of a block for search results."
      },
@@ -7090,7 +7138,7 @@
          "usage_hint": {
            "type": "string",
            "title": "Usage Hint",
-            "default": "To execute a block, call run_block with block_id set to the block's 'id' field and input_data containing the required fields from input_schema."
+            "default": "To execute a block, call run_block with block_id set to the block's 'id' field and input_data containing the fields listed in required_inputs."
          }
        },
        "type": "object",
@@ -10484,6 +10532,7 @@
          "agent_saved",
          "clarification_needed",
          "block_list",
          "block_details",
          "block_output",
          "doc_search_results",
          "doc_page",
@@ -10495,7 +10544,10 @@
          "operation_started",
          "operation_pending",
          "operation_in_progress",
-          "input_validation_error"
+          "input_validation_error",
          "web_fetch",
          "bash_exec",
          "operation_status"
        ],
        "title": "ResponseType",
        "description": "Types of tool responses."
--- a/autogpt_platform/frontend/src/app/globals.css
+++ b/autogpt_platform/frontend/src/app/globals.css
@@ -180,3 +180,14 @@ body[data-google-picker-open="true"] [data-dialog-content] {
  z-index: 1 !important;
  pointer-events: none !important;
 }
 /* CoPilot chat table styling — remove left/right borders, increase padding */
 [data-streamdown="table-wrapper"] table {
  border-left: none;
  border-right: none;
 }
 [data-streamdown="table-wrapper"] th,
 [data-streamdown="table-wrapper"] td {
  padding: 0.875rem 1rem; /* py-3.5 px-4 */
 }
--- a/autogpt_platform/frontend/src/components/contextual/CredentialsInput/components/APIKeyCredentialsModal/APIKeyCredentialsModal.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/CredentialsInput/components/APIKeyCredentialsModal/APIKeyCredentialsModal.tsx
@@ -30,6 +30,7 @@ export function APIKeyCredentialsModal({
  const {
    form,
    isLoading,
    isSubmitting,
    supportsApiKey,
    providerName,
    schemaDescription,
@@ -138,7 +139,12 @@ export function APIKeyCredentialsModal({
                />
              )}
            />
-            <Button type="submit" className="min-w-68">
+            <Button
              type="submit"
              className="min-w-68"
              loading={isSubmitting}
              disabled={isSubmitting}
            >
              Add API Key
            </Button>
          </form>
--- a/autogpt_platform/frontend/src/components/contextual/CredentialsInput/components/APIKeyCredentialsModal/useAPIKeyCredentialsModal.ts
+++ b/autogpt_platform/frontend/src/components/contextual/CredentialsInput/components/APIKeyCredentialsModal/useAPIKeyCredentialsModal.ts
@@ -4,6 +4,7 @@ import {
  CredentialsMetaInput,
 } from "@/lib/autogpt-server-api/types";
 import { zodResolver } from "@hookform/resolvers/zod";
 import { useState } from "react";
 import { useForm, type UseFormReturn } from "react-hook-form";
 import { z } from "zod";
@@ -26,6 +27,7 @@ export function useAPIKeyCredentialsModal({
 }: Args): {
  form: UseFormReturn<APIKeyFormValues>;
  isLoading: boolean;
  isSubmitting: boolean;
  supportsApiKey: boolean;
  provider?: string;
  providerName?: string;
@@ -33,6 +35,7 @@ export function useAPIKeyCredentialsModal({
  onSubmit: (values: APIKeyFormValues) => Promise<void>;
 } {
  const credentials = useCredentials(schema, siblingInputs);
  const [isSubmitting, setIsSubmitting] = useState(false);
  const formSchema = z.object({
    apiKey: z.string().min(1, "API Key is required"),
@@ -40,48 +43,42 @@ export function useAPIKeyCredentialsModal({
    expiresAt: z.string().optional(),
  });
  function getDefaultExpirationDate(): string {
    const tomorrow = new Date();
    tomorrow.setDate(tomorrow.getDate() + 1);
    tomorrow.setHours(0, 0, 0, 0);
    const year = tomorrow.getFullYear();
    const month = String(tomorrow.getMonth() + 1).padStart(2, "0");
    const day = String(tomorrow.getDate()).padStart(2, "0");
    const hours = String(tomorrow.getHours()).padStart(2, "0");
    const minutes = String(tomorrow.getMinutes()).padStart(2, "0");
    return `${year}-${month}-${day}T${hours}:${minutes}`;
  }
  const form = useForm<APIKeyFormValues>({
    resolver: zodResolver(formSchema),
    defaultValues: {
      apiKey: "",
      title: "",
-      expiresAt: getDefaultExpirationDate(),
+      expiresAt: "",
    },
  });
  async function onSubmit(values: APIKeyFormValues) {
    if (!credentials || credentials.isLoading) return;
-    const expiresAt = values.expiresAt
+    setIsSubmitting(true);
-      ? new Date(values.expiresAt).getTime() / 1000
+    try {
-      : undefined;
+      const expiresAt = values.expiresAt
-    const newCredentials = await credentials.createAPIKeyCredentials({
+        ? new Date(values.expiresAt).getTime() / 1000
-      api_key: values.apiKey,
+        : undefined;
-      title: values.title,
+      const newCredentials = await credentials.createAPIKeyCredentials({
-      expires_at: expiresAt,
+        api_key: values.apiKey,
-    });
+        title: values.title,
-    onCredentialsCreate({
+        expires_at: expiresAt,
-      provider: credentials.provider,
+      });
-      id: newCredentials.id,
+      onCredentialsCreate({
-      type: "api_key",
+        provider: credentials.provider,
-      title: newCredentials.title,
+        id: newCredentials.id,
-    });
+        type: "api_key",
        title: newCredentials.title,
      });
    } finally {
      setIsSubmitting(false);
    }
  }
  return {
    form,
    isLoading: !credentials || credentials.isLoading,
    isSubmitting,
    supportsApiKey: !!credentials?.supportsApiKey,
    provider: credentials?.provider,
    providerName:
--- a/autogpt_platform/frontend/src/components/contextual/OutputRenderers/renderers/MarkdownRenderer.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/OutputRenderers/renderers/MarkdownRenderer.tsx
@@ -226,7 +226,7 @@ function renderMarkdown(
          table: ({ children, ...props }) => (
            <div className="my-4 overflow-x-auto">
              <table
-                className="min-w-full divide-y divide-gray-200 rounded-lg border border-gray-200 dark:divide-gray-700 dark:border-gray-700"
+                className="min-w-full divide-y divide-gray-200 border-y border-gray-200 dark:divide-gray-700 dark:border-gray-700"
                {...props}
              >
                {children}
@@ -235,7 +235,7 @@ function renderMarkdown(
          ),
          th: ({ children, ...props }) => (
            <th
-              className="bg-gray-50 px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-gray-700 dark:bg-gray-800 dark:text-gray-300"
+              className="bg-gray-50 px-4 py-3.5 text-left text-xs font-semibold uppercase tracking-wider text-gray-700 dark:bg-gray-800 dark:text-gray-300"
              {...props}
            >
              {children}
@@ -243,7 +243,7 @@ function renderMarkdown(
          ),
          td: ({ children, ...props }) => (
            <td
-              className="border-t border-gray-200 px-4 py-3 text-sm text-gray-600 dark:border-gray-700 dark:text-gray-400"
+              className="border-t border-gray-200 px-4 py-3.5 text-sm text-gray-600 dark:border-gray-700 dark:text-gray-400"
              {...props}
            >
              {children}
--- a/docs/integrations/block-integrations/llm.md
+++ b/docs/integrations/block-integrations/llm.md
@@ -563,7 +563,7 @@ The block supports conversation continuation through three mechanisms:
 |--------|-------------|------|
 | error | Error message if execution failed | str |
 | response | The output/response from Claude Code execution | str |
-| files | List of text files created/modified by Claude Code during this execution. Each file has 'path', 'relative_path', 'name', and 'content' fields. | List[FileOutput] |
+| files | List of text files created/modified by Claude Code during this execution. Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. workspace_ref contains a workspace:// URI if the file was stored to workspace. | List[SandboxFileOutput] |
 | conversation_history | Full conversation history including this turn. Pass this to conversation_history input to continue on a fresh sandbox if the previous sandbox timed out. | str |
 | session_id | Session ID for this conversation. Pass this back along with sandbox_id to continue the conversation. | str |
 | sandbox_id | ID of the sandbox instance. Pass this back along with session_id to continue the conversation. This is None if dispose_sandbox was True (sandbox was disposed). | str |
--- a/docs/integrations/block-integrations/misc.md
+++ b/docs/integrations/block-integrations/misc.md
@@ -215,6 +215,7 @@ The sandbox includes pip and npm pre-installed. Set timeout to limit execution t
 | response | Text output (if any) of the main execution result | str |
 | stdout_logs | Standard output logs from execution | str |
 | stderr_logs | Standard error logs from execution | str |
 | files | Files created or modified during execution. Each file has path, name, content, and workspace_ref (if stored). | List[SandboxFileOutput] |
 ### Possible use case
 <!-- MANUAL: use_case -->