mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
4 Commits
jps/custom
...
fix-sqlalc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
836cf322a7 | ||
|
|
306950f328 | ||
|
|
f25160ad94 | ||
|
|
b10f3b2cbb |
69
.github/workflows/clean-up.yml
vendored
Normal file
69
.github/workflows/clean-up.yml
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
# Workflow that cleans up outdated and old workflows to prevent out of disk issues
|
||||
name: Delete old workflow runs
|
||||
|
||||
# This workflow is currently only triggered manually
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
days:
|
||||
description: 'Days-worth of runs to keep for each workflow'
|
||||
required: true
|
||||
default: '30'
|
||||
minimum_runs:
|
||||
description: 'Minimum runs to keep for each workflow'
|
||||
required: true
|
||||
default: '10'
|
||||
delete_workflow_pattern:
|
||||
description: 'Name or filename of the workflow (if not set, all workflows are targeted)'
|
||||
required: false
|
||||
delete_workflow_by_state_pattern:
|
||||
description: 'Filter workflows by state: active, deleted, disabled_fork, disabled_inactivity, disabled_manually'
|
||||
required: true
|
||||
default: "ALL"
|
||||
type: choice
|
||||
options:
|
||||
- "ALL"
|
||||
- active
|
||||
- deleted
|
||||
- disabled_inactivity
|
||||
- disabled_manually
|
||||
delete_run_by_conclusion_pattern:
|
||||
description: 'Remove runs based on conclusion: action_required, cancelled, failure, skipped, success'
|
||||
required: true
|
||||
default: 'ALL'
|
||||
type: choice
|
||||
options:
|
||||
- 'ALL'
|
||||
- 'Unsuccessful: action_required,cancelled,failure,skipped'
|
||||
- action_required
|
||||
- cancelled
|
||||
- failure
|
||||
- skipped
|
||||
- success
|
||||
dry_run:
|
||||
description: 'Logs simulated changes, no deletions are performed'
|
||||
required: false
|
||||
|
||||
jobs:
|
||||
del_runs:
|
||||
runs-on: blacksmith-4vcpu-ubuntu-2204
|
||||
permissions:
|
||||
actions: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Delete workflow runs
|
||||
uses: Mattraks/delete-workflow-runs@v2
|
||||
with:
|
||||
token: ${{ github.token }}
|
||||
repository: ${{ github.repository }}
|
||||
retain_days: ${{ github.event.inputs.days }}
|
||||
keep_minimum_runs: ${{ github.event.inputs.minimum_runs }}
|
||||
delete_workflow_pattern: ${{ github.event.inputs.delete_workflow_pattern }}
|
||||
delete_workflow_by_state_pattern: ${{ github.event.inputs.delete_workflow_by_state_pattern }}
|
||||
delete_run_by_conclusion_pattern: >-
|
||||
${{
|
||||
startsWith(github.event.inputs.delete_run_by_conclusion_pattern, 'Unsuccessful:')
|
||||
&& 'action_required,cancelled,failure,skipped'
|
||||
|| github.event.inputs.delete_run_by_conclusion_pattern
|
||||
}}
|
||||
dry_run: ${{ github.event.inputs.dry_run }}
|
||||
23
.github/workflows/dispatch-to-docs.yml
vendored
Normal file
23
.github/workflows/dispatch-to-docs.yml
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
name: Dispatch to docs repo
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'docs/**'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
dispatch:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
repo: ["OpenHands/docs"]
|
||||
steps:
|
||||
- name: Push to docs repo
|
||||
uses: peter-evans/repository-dispatch@v3
|
||||
with:
|
||||
token: ${{ secrets.ALLHANDS_BOT_GITHUB_PAT }}
|
||||
repository: ${{ matrix.repo }}
|
||||
event-type: update
|
||||
client-payload: '{"ref": "${{ github.ref }}", "sha": "${{ github.sha }}", "module": "openhands", "branch": "main"}'
|
||||
70
.github/workflows/mdx-lint.yml
vendored
Normal file
70
.github/workflows/mdx-lint.yml
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
# Workflow that checks MDX format in docs/ folder
|
||||
name: MDX Lint
|
||||
|
||||
# Run on pushes to main and on pull requests that modify docs/ files
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'docs/**/*.mdx'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'docs/**/*.mdx'
|
||||
|
||||
# If triggered by a PR, it will be in the same group. However, each commit on main will be in its own unique group
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ (github.head_ref && github.ref) || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
mdx-lint:
|
||||
name: Lint MDX files
|
||||
runs-on: blacksmith-4vcpu-ubuntu-2204
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Node.js 22
|
||||
uses: useblacksmith/setup-node@v5
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Install MDX dependencies
|
||||
run: |
|
||||
npm install @mdx-js/mdx@3 glob@10
|
||||
|
||||
- name: Validate MDX files
|
||||
run: |
|
||||
node -e "
|
||||
const {compile} = require('@mdx-js/mdx');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const glob = require('glob');
|
||||
|
||||
async function validateMDXFiles() {
|
||||
const files = glob.sync('docs/**/*.mdx');
|
||||
console.log('Found', files.length, 'MDX files to validate');
|
||||
|
||||
let hasErrors = false;
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const content = fs.readFileSync(file, 'utf8');
|
||||
await compile(content);
|
||||
console.log('✅ MDX parsing successful for', file);
|
||||
} catch (err) {
|
||||
console.error('❌ MDX parsing failed for', file, ':', err.message);
|
||||
hasErrors = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasErrors) {
|
||||
console.error('\\n❌ Some MDX files have parsing errors. Please fix them before merging.');
|
||||
process.exit(1);
|
||||
} else {
|
||||
console.log('\\n✅ All MDX files are valid!');
|
||||
}
|
||||
}
|
||||
|
||||
validateMDXFiles();
|
||||
"
|
||||
135
.github/workflows/run-eval.yml
vendored
Normal file
135
.github/workflows/run-eval.yml
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
# Run evaluation on a PR, after releases, or manually
|
||||
name: Run Eval
|
||||
|
||||
# Runs when a PR is labeled with one of the "run-eval-" labels, after releases, or manually triggered
|
||||
on:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
release:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
branch:
|
||||
description: 'Branch to evaluate'
|
||||
required: true
|
||||
default: 'main'
|
||||
eval_instances:
|
||||
description: 'Number of evaluation instances'
|
||||
required: true
|
||||
default: '50'
|
||||
type: choice
|
||||
options:
|
||||
- '1'
|
||||
- '2'
|
||||
- '50'
|
||||
- '100'
|
||||
reason:
|
||||
description: 'Reason for manual trigger'
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
env:
|
||||
# Environment variable for the master GitHub issue number where all evaluation results will be commented
|
||||
# This should be set to the issue number where you want all evaluation results to be posted
|
||||
MASTER_EVAL_ISSUE_NUMBER: ${{ vars.MASTER_EVAL_ISSUE_NUMBER || '0' }}
|
||||
|
||||
jobs:
|
||||
trigger-job:
|
||||
name: Trigger remote eval job
|
||||
if: ${{ (github.event_name == 'pull_request' && (github.event.label.name == 'run-eval-1' || github.event.label.name == 'run-eval-2' || github.event.label.name == 'run-eval-50' || github.event.label.name == 'run-eval-100')) || github.event_name == 'release' || github.event_name == 'workflow_dispatch' }}
|
||||
runs-on: blacksmith-4vcpu-ubuntu-2204
|
||||
|
||||
steps:
|
||||
- name: Checkout branch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.head_ref || (github.event_name == 'workflow_dispatch' && github.event.inputs.branch) || github.ref }}
|
||||
|
||||
- name: Set evaluation parameters
|
||||
id: eval_params
|
||||
run: |
|
||||
REPO_URL="https://github.com/${{ github.repository }}"
|
||||
echo "Repository URL: $REPO_URL"
|
||||
|
||||
# Determine branch based on trigger type
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
EVAL_BRANCH="${{ github.head_ref }}"
|
||||
echo "PR Branch: $EVAL_BRANCH"
|
||||
elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
EVAL_BRANCH="${{ github.event.inputs.branch }}"
|
||||
echo "Manual Branch: $EVAL_BRANCH"
|
||||
else
|
||||
# For release events, use the tag name or main branch
|
||||
EVAL_BRANCH="${{ github.ref_name }}"
|
||||
echo "Release Branch/Tag: $EVAL_BRANCH"
|
||||
fi
|
||||
|
||||
# Determine evaluation instances based on trigger type
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
if [[ "${{ github.event.label.name }}" == "run-eval-1" ]]; then
|
||||
EVAL_INSTANCES="1"
|
||||
elif [[ "${{ github.event.label.name }}" == "run-eval-2" ]]; then
|
||||
EVAL_INSTANCES="2"
|
||||
elif [[ "${{ github.event.label.name }}" == "run-eval-50" ]]; then
|
||||
EVAL_INSTANCES="50"
|
||||
elif [[ "${{ github.event.label.name }}" == "run-eval-100" ]]; then
|
||||
EVAL_INSTANCES="100"
|
||||
fi
|
||||
elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
EVAL_INSTANCES="${{ github.event.inputs.eval_instances }}"
|
||||
else
|
||||
# For release events, default to 50 instances
|
||||
EVAL_INSTANCES="50"
|
||||
fi
|
||||
|
||||
echo "Evaluation instances: $EVAL_INSTANCES"
|
||||
echo "repo_url=$REPO_URL" >> $GITHUB_OUTPUT
|
||||
echo "eval_branch=$EVAL_BRANCH" >> $GITHUB_OUTPUT
|
||||
echo "eval_instances=$EVAL_INSTANCES" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Trigger remote job
|
||||
run: |
|
||||
# Determine PR number for the remote evaluation system
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
PR_NUMBER="${{ github.event.pull_request.number }}"
|
||||
else
|
||||
# For non-PR triggers, use the master issue number as PR number
|
||||
PR_NUMBER="${{ env.MASTER_EVAL_ISSUE_NUMBER }}"
|
||||
fi
|
||||
|
||||
curl -X POST \
|
||||
-H "Authorization: Bearer ${{ secrets.PAT_TOKEN }}" \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-d "{\"ref\": \"main\", \"inputs\": {\"github-repo\": \"${{ steps.eval_params.outputs.repo_url }}\", \"github-branch\": \"${{ steps.eval_params.outputs.eval_branch }}\", \"pr-number\": \"${PR_NUMBER}\", \"eval-instances\": \"${{ steps.eval_params.outputs.eval_instances }}\"}}" \
|
||||
https://api.github.com/repos/OpenHands/evaluation/actions/workflows/create-branch.yml/dispatches
|
||||
|
||||
# Send Slack message
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
TRIGGER_URL="https://github.com/${{ github.repository }}/pull/${{ github.event.pull_request.number }}"
|
||||
slack_text="PR $TRIGGER_URL has triggered evaluation on ${{ steps.eval_params.outputs.eval_instances }} instances..."
|
||||
elif [[ "${{ github.event_name }}" == "release" ]]; then
|
||||
TRIGGER_URL="https://github.com/${{ github.repository }}/releases/tag/${{ github.ref_name }}"
|
||||
slack_text="Release $TRIGGER_URL has triggered evaluation on ${{ steps.eval_params.outputs.eval_instances }} instances..."
|
||||
else
|
||||
TRIGGER_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
slack_text="Manual trigger (${{ github.event.inputs.reason || 'No reason provided' }}) has triggered evaluation on ${{ steps.eval_params.outputs.eval_instances }} instances for branch ${{ steps.eval_params.outputs.eval_branch }}..."
|
||||
fi
|
||||
|
||||
curl -X POST -H 'Content-type: application/json' --data '{"text":"'"$slack_text"'"}' \
|
||||
https://hooks.slack.com/services/${{ secrets.SLACK_TOKEN }}
|
||||
|
||||
- name: Comment on issue/PR
|
||||
uses: KeisukeYamashita/create-comment@v1
|
||||
with:
|
||||
# For PR triggers, comment on the PR. For other triggers, comment on the master issue
|
||||
number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || env.MASTER_EVAL_ISSUE_NUMBER }}
|
||||
unique: false
|
||||
comment: |
|
||||
**Evaluation Triggered**
|
||||
|
||||
**Trigger:** ${{ github.event_name == 'pull_request' && format('Pull Request #{0}', github.event.pull_request.number) || (github.event_name == 'release' && 'Release') || format('Manual Trigger: {0}', github.event.inputs.reason || 'No reason provided') }}
|
||||
**Branch:** ${{ steps.eval_params.outputs.eval_branch }}
|
||||
**Instances:** ${{ steps.eval_params.outputs.eval_instances }}
|
||||
**Commit:** ${{ github.sha }}
|
||||
|
||||
Running evaluation on the specified branch. Once eval is done, the results will be posted here.
|
||||
11
README.md
11
README.md
@@ -1,7 +1,7 @@
|
||||
<a name="readme-top"></a>
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/OpenHands/docs/main/openhands/static/img/logo.png" alt="Logo" width="200">
|
||||
<img src="https://raw.githubusercontent.com/All-Hands-AI/docs/main/openhands/static/img/logo.png" alt="Logo" width="200">
|
||||
<h1 align="center" style="border-bottom: none">OpenHands: AI-Driven Development</h1>
|
||||
</div>
|
||||
|
||||
@@ -35,9 +35,9 @@ There are a few ways to work with OpenHands:
|
||||
### OpenHands Software Agent SDK
|
||||
The SDK is a composable Python library that contains all of our agentic tech. It's the engine that powers everything else below.
|
||||
|
||||
Define agents in code, then run them locally, or scale to 1000s of agents in the cloud.
|
||||
Define agents in code, then run them locally, or scale to 1000s of agents in the cloud
|
||||
|
||||
[Check out the docs](https://docs.openhands.dev/sdk) or [view the source](https://github.com/OpenHands/software-agent-sdk/)
|
||||
[Check out the docs](https://docs.openhands.dev/sdk) or [view the source](https://github.com/All-Hands-AI/agent-sdk/)
|
||||
|
||||
### OpenHands CLI
|
||||
The CLI is the easiest way to start using OpenHands. The experience will be familiar to anyone who has worked
|
||||
@@ -52,15 +52,18 @@ The experience will be familiar to anyone who has used Devin or Jules.
|
||||
[Check out the docs](https://docs.openhands.dev/openhands/usage/run-openhands/local-setup) or view the source in this repo.
|
||||
|
||||
### OpenHands Cloud
|
||||
This is a deployment of OpenHands GUI, running on hosted infrastructure.
|
||||
This is a commercial deployment of OpenHands GUI, running on hosted infrastructure.
|
||||
|
||||
You can try it with a free $10 credit by [signing in with your GitHub account](https://app.all-hands.dev).
|
||||
|
||||
OpenHands Cloud comes with source-available features and integrations:
|
||||
- Deeper integrations with GitHub, GitLab, and Bitbucket
|
||||
- Integrations with Slack, Jira, and Linear
|
||||
- Multi-user support
|
||||
- RBAC and permissions
|
||||
- Collaboration features (e.g., conversation sharing)
|
||||
- Usage reporting
|
||||
- Budgeting enforcement
|
||||
|
||||
### OpenHands Enterprise
|
||||
Large enterprises can work with us to self-host OpenHands Cloud in their own VPC, via Kubernetes.
|
||||
|
||||
@@ -1,836 +0,0 @@
|
||||
# Custom Agent Packages with Custom Runtime Images (Scenario 1)
|
||||
|
||||
## 1. Introduction
|
||||
|
||||
### 1.1 Problem Statement
|
||||
|
||||
OpenHands currently supports agent customization through the software-agent-sdk, but users who need custom system dependencies, specialized tools, or non-Python runtime environments cannot easily deploy their agents. The current V1 architecture uses a fixed agent server image (`ghcr.io/openhands/agent-server:5f62cee-python`) that may not contain the required dependencies for specialized agents.
|
||||
|
||||
Users building agents that require:
|
||||
- Custom system packages (e.g., specialized compilers, databases, ML frameworks)
|
||||
- Non-Python tools and runtimes (e.g., Node.js, Go, Rust toolchains)
|
||||
- Custom Docker base images with specific OS configurations
|
||||
- Proprietary or licensed software installations
|
||||
|
||||
Currently have no supported path to deploy their agents to OpenHands Enterprise.
|
||||
|
||||
### 1.2 Proposed Solution
|
||||
|
||||
We propose extending the existing **Sandbox Specification System** to support custom agent runtime images with proper permissions and security controls. This approach builds directly on OpenHands' current sandbox infrastructure rather than creating parallel systems.
|
||||
|
||||
Users will be able to:
|
||||
1. Create custom Docker images containing their agent code and dependencies
|
||||
2. Register these images as enhanced sandbox specifications with rich metadata
|
||||
3. Deploy conversations using their custom sandbox specs (with proper permissions)
|
||||
4. Maintain full compatibility with existing sandbox management and API infrastructure
|
||||
|
||||
The solution extends the current `SandboxSpecService` with:
|
||||
- **Permission-based access control** to limit custom specs to authorized users
|
||||
- **Enhanced sandbox specifications** that include agent-specific metadata and requirements
|
||||
- **Secure image management** with validation and approval workflows
|
||||
- **Integrated deployment** through existing conversation creation APIs
|
||||
|
||||
**Trade-offs**: This approach requires users to build and maintain Docker images, increasing complexity compared to simple Python package deployment. However, it provides the necessary isolation and dependency management for complex agent requirements while leveraging proven sandbox infrastructure.
|
||||
|
||||
## 2. User Interface
|
||||
|
||||
### 2.1 Custom Agent Image Creation
|
||||
|
||||
Users create a custom agent image by extending the base agent server image:
|
||||
|
||||
```dockerfile
|
||||
# Dockerfile for custom agent
|
||||
FROM ghcr.io/openhands/agent-server:5f62cee-python
|
||||
|
||||
# Install custom system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
nodejs \
|
||||
npm \
|
||||
golang-go \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install custom Python packages
|
||||
COPY requirements.txt /tmp/
|
||||
RUN pip install -r /tmp/requirements.txt
|
||||
|
||||
# Copy custom agent code
|
||||
COPY my_custom_agent/ /app/my_custom_agent/
|
||||
COPY agent_config.json /app/config/
|
||||
|
||||
# Set custom agent as default
|
||||
ENV CUSTOM_AGENT_MODULE=my_custom_agent
|
||||
ENV CUSTOM_AGENT_CLASS=MySpecializedAgent
|
||||
```
|
||||
|
||||
### 2.2 Enhanced Sandbox Spec Registration
|
||||
|
||||
Users register their custom agent image as an enhanced sandbox specification:
|
||||
|
||||
```yaml
|
||||
# enhanced-sandbox-spec.yaml
|
||||
apiVersion: openhands.ai/v1
|
||||
kind: SandboxSpec
|
||||
metadata:
|
||||
name: specialized-ml-agent
|
||||
version: "1.0.0"
|
||||
owner: user@company.com
|
||||
permissions:
|
||||
users: ["user@company.com", "team-lead@company.com"]
|
||||
groups: ["ml-team", "data-science"]
|
||||
spec:
|
||||
image: "myregistry/specialized-ml-agent:v1.0.0"
|
||||
description: "ML agent with TensorFlow and custom data processing tools"
|
||||
# Agent-specific metadata
|
||||
agent:
|
||||
capabilities:
|
||||
- machine_learning
|
||||
- data_analysis
|
||||
- custom_visualization
|
||||
type: "custom"
|
||||
module: "agents.specialized_ml_agent"
|
||||
class: "SpecializedMLAgent"
|
||||
requirements:
|
||||
memory: "4Gi"
|
||||
cpu: "2"
|
||||
environment:
|
||||
TENSORFLOW_VERSION: "2.15.0"
|
||||
CUSTOM_MODEL_PATH: "/app/models"
|
||||
# Agent server configuration
|
||||
CUSTOM_AGENT_MODULE: "agents.specialized_ml_agent"
|
||||
CUSTOM_AGENT_CLASS: "SpecializedMLAgent"
|
||||
ports:
|
||||
- name: agent-server
|
||||
port: 8000
|
||||
- name: tensorboard
|
||||
port: 6006
|
||||
```
|
||||
|
||||
### 2.3 Conversation Creation with Custom Sandbox Spec
|
||||
|
||||
Users create conversations using their custom sandbox specs through the existing API:
|
||||
|
||||
```bash
|
||||
# Create conversation with custom sandbox spec
|
||||
curl -X POST "https://api.openhands.ai/api/conversations" \
|
||||
-H "Authorization: Bearer $API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"sandbox_spec_id": "specialized-ml-agent:v1.0.0",
|
||||
"initial_message": "Analyze this dataset and create a predictive model",
|
||||
"workspace": {
|
||||
"type": "local",
|
||||
"working_dir": "/workspace/ml-project"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
### 2.4 Image Management Workflows
|
||||
|
||||
#### 2.4.1 Pre-built Image Approach
|
||||
|
||||
For organizations that want to manage custom agent images centrally:
|
||||
|
||||
```bash
|
||||
# Admin registers pre-built image as sandbox spec
|
||||
curl -X POST "https://api.openhands.ai/api/sandbox-specs" \
|
||||
-H "Authorization: Bearer $ADMIN_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "company-ml-agent",
|
||||
"version": "1.0.0",
|
||||
"image": "company-registry/ml-agent:v1.0.0",
|
||||
"permissions": {
|
||||
"groups": ["ml-team", "data-science"]
|
||||
},
|
||||
"agent": {
|
||||
"type": "custom",
|
||||
"capabilities": ["machine_learning", "data_analysis"]
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
#### 2.4.2 User Upload Approach
|
||||
|
||||
For users who want to upload their own custom images:
|
||||
|
||||
```bash
|
||||
# User uploads custom image (with security validation)
|
||||
curl -X POST "https://api.openhands.ai/api/sandbox-specs/upload" \
|
||||
-H "Authorization: Bearer $API_KEY" \
|
||||
-F "dockerfile=@Dockerfile" \
|
||||
-F "context=@agent-context.tar.gz" \
|
||||
-F "spec=@sandbox-spec.yaml"
|
||||
```
|
||||
|
||||
## 3. Other Context
|
||||
|
||||
### 3.1 Current Sandbox Specification System
|
||||
|
||||
OpenHands V1 uses a sandbox specification system to manage container deployments:
|
||||
|
||||
- **Single Default Spec**: Currently only one sandbox spec exists, shared by all users
|
||||
- **SandboxSpecService**: Manages sandbox specifications and container creation
|
||||
- **SandboxSpecInfo**: Contains image, environment, and resource configuration
|
||||
- **No Permissions**: Current system lacks user-based access control
|
||||
|
||||
The existing system provides the foundation but needs enhancement for custom agents:
|
||||
- **Permission Layer**: Required to control access to custom specs
|
||||
- **Rich Metadata**: Need agent-specific information beyond basic container config
|
||||
- **Image Management**: Need secure workflows for custom image registration
|
||||
|
||||
### 3.2 Enhanced Sandbox Specification Architecture
|
||||
|
||||
Our proposal extends the existing system with:
|
||||
|
||||
#### 3.2.1 Permission-Based Access Control
|
||||
- **User Permissions**: Individual user access to specific sandbox specs
|
||||
- **Group Permissions**: Team-based access control for organizational specs
|
||||
- **Owner Management**: Spec ownership and delegation capabilities
|
||||
- **Admin Override**: Administrative access for spec management
|
||||
|
||||
#### 3.2.2 Agent-Specific Metadata
|
||||
- **Agent Configuration**: Module, class, and capability information
|
||||
- **Resource Requirements**: Memory, CPU, and storage specifications
|
||||
- **Environment Variables**: Agent-specific configuration and secrets
|
||||
- **Port Mappings**: Additional ports for agent services (e.g., TensorBoard)
|
||||
|
||||
#### 3.2.3 Image Management Integration
|
||||
- **Registry Support**: Integration with Docker registries for image storage
|
||||
- **Security Validation**: Image scanning and approval workflows
|
||||
- **Version Management**: Support for multiple versions of custom specs
|
||||
- **Build Integration**: Optional image building from Dockerfile uploads
|
||||
|
||||
### 3.3 Existing Container Orchestration Integration
|
||||
|
||||
The enhanced system leverages existing OpenHands infrastructure:
|
||||
|
||||
- **Sandbox Service**: Extended to support permission checks and enhanced specs
|
||||
- **Container Management**: Same lifecycle management with additional metadata
|
||||
- **Network Isolation**: Maintains existing security boundaries
|
||||
- **Resource Enforcement**: Enhanced with custom resource requirements
|
||||
- **Health Monitoring**: Extended to track custom agent-specific metrics
|
||||
|
||||
## 4. Technical Design
|
||||
|
||||
### 4.1 Enhanced Sandbox Specification Model
|
||||
|
||||
#### 4.1.1 Extended SandboxSpecInfo Structure
|
||||
|
||||
The existing `SandboxSpecInfo` model is enhanced to support custom agents:
|
||||
|
||||
```python
|
||||
# openhands/app_server/sandbox/sandbox_spec_models.py (enhanced)
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
class AgentMetadata(BaseModel):
|
||||
"""Agent-specific metadata for custom agents."""
|
||||
type: str = Field(default="default", description="Agent type (default|custom)")
|
||||
capabilities: List[str] = Field(default_factory=list, description="Agent capabilities")
|
||||
module: Optional[str] = Field(description="Python module containing agent class")
|
||||
class_name: Optional[str] = Field(description="Agent class name")
|
||||
|
||||
class PermissionSpec(BaseModel):
|
||||
"""Permission specification for sandbox spec access."""
|
||||
users: List[str] = Field(default_factory=list, description="Authorized user emails")
|
||||
groups: List[str] = Field(default_factory=list, description="Authorized group names")
|
||||
owner: Optional[str] = Field(description="Spec owner")
|
||||
|
||||
class EnhancedSandboxSpecInfo(BaseModel):
|
||||
"""Enhanced sandbox specification with agent metadata and permissions."""
|
||||
|
||||
# Existing fields from SandboxSpecInfo
|
||||
id: str = Field(description="Docker image identifier")
|
||||
command: List[str] = Field(default_factory=lambda: ['--port', '8000'])
|
||||
initial_env: Dict[str, str] = Field(default_factory=dict)
|
||||
working_dir: str = Field(default="/workspace/project")
|
||||
|
||||
# Enhanced fields
|
||||
name: str = Field(description="Human-readable spec name")
|
||||
version: str = Field(description="Spec version")
|
||||
description: Optional[str] = Field(description="Spec description")
|
||||
|
||||
# Agent-specific metadata
|
||||
agent: AgentMetadata = Field(default_factory=AgentMetadata)
|
||||
|
||||
# Permission and access control
|
||||
permissions: PermissionSpec = Field(default_factory=PermissionSpec)
|
||||
|
||||
# Resource requirements
|
||||
memory_limit: Optional[str] = Field(description="Memory limit (e.g., '4Gi')")
|
||||
cpu_limit: Optional[str] = Field(description="CPU limit (e.g., '2')")
|
||||
|
||||
# Additional ports for custom services
|
||||
ports: List[Dict[str, any]] = Field(
|
||||
default_factory=lambda: [{"name": "agent-server", "port": 8000}]
|
||||
)
|
||||
```
|
||||
|
||||
#### 4.1.2 Custom Agent Image Structure
|
||||
|
||||
Custom agent images extend the base agent server with this structure:
|
||||
|
||||
```
|
||||
/app/
|
||||
├── config/
|
||||
│ ├── agent_config.json # Agent configuration
|
||||
│ └── tool_registry.json # Custom tool definitions (optional)
|
||||
├── agents/
|
||||
│ └── custom_agent.py # Agent implementation
|
||||
├── tools/ # Custom tools (optional)
|
||||
│ ├── __init__.py
|
||||
│ └── custom_tools.py
|
||||
└── startup/
|
||||
└── init_agent.py # Agent initialization script
|
||||
```
|
||||
|
||||
### 4.2 Agent Implementation Interface
|
||||
|
||||
#### 4.2.1 Custom Agent Base Class
|
||||
|
||||
```python
|
||||
# agents/custom_agent.py
|
||||
from openhands.sdk.agent.base import AgentBase
|
||||
from openhands.sdk.llm import LLM
|
||||
from openhands.sdk.tool import Tool
|
||||
from typing import List, Dict, Any
|
||||
|
||||
class SpecializedMLAgent(AgentBase):
|
||||
"""Custom ML agent with TensorFlow capabilities."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm: LLM,
|
||||
tools: List[Tool],
|
||||
config: Dict[str, Any] = None
|
||||
):
|
||||
super().__init__(llm=llm, tools=tools)
|
||||
self.config = config or {}
|
||||
self.model_cache = self.config.get('MODEL_CACHE_DIR', '/app/models')
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize custom agent resources."""
|
||||
# Load pre-trained models
|
||||
await self._load_models()
|
||||
|
||||
# Initialize custom tools
|
||||
await self._setup_custom_tools()
|
||||
|
||||
async def _load_models(self) -> None:
|
||||
"""Load TensorFlow models from cache."""
|
||||
import tensorflow as tf
|
||||
# Custom model loading logic
|
||||
pass
|
||||
|
||||
async def _setup_custom_tools(self) -> None:
|
||||
"""Initialize custom tools with agent context."""
|
||||
# Custom tool setup logic
|
||||
pass
|
||||
```
|
||||
|
||||
#### 4.2.2 Custom Tool Implementation
|
||||
|
||||
```python
|
||||
# tools/custom_tools.py
|
||||
from openhands.sdk.tool import Tool, ToolExecutor, register_tool
|
||||
from openhands.sdk import Action, Observation
|
||||
from pydantic import Field
|
||||
import tensorflow as tf
|
||||
|
||||
class TensorFlowAnalysisAction(Action):
|
||||
dataset_path: str = Field(description="Path to dataset file")
|
||||
model_type: str = Field(description="Type of ML model to create")
|
||||
target_column: str = Field(description="Target column for prediction")
|
||||
|
||||
class TensorFlowAnalysisObservation(Observation):
|
||||
model_accuracy: float = Field(description="Model accuracy score")
|
||||
feature_importance: Dict[str, float] = Field(description="Feature importance scores")
|
||||
model_path: str = Field(description="Path to saved model")
|
||||
|
||||
class TensorFlowToolExecutor(ToolExecutor[TensorFlowAnalysisAction, TensorFlowAnalysisObservation]):
|
||||
def __call__(self, action: TensorFlowAnalysisAction, conversation=None) -> TensorFlowAnalysisObservation:
|
||||
# Custom TensorFlow analysis logic
|
||||
model = self._create_model(action.dataset_path, action.model_type, action.target_column)
|
||||
accuracy = self._evaluate_model(model)
|
||||
importance = self._get_feature_importance(model)
|
||||
model_path = self._save_model(model)
|
||||
|
||||
return TensorFlowAnalysisObservation(
|
||||
model_accuracy=accuracy,
|
||||
feature_importance=importance,
|
||||
model_path=model_path
|
||||
)
|
||||
|
||||
# Register the custom tool
|
||||
register_tool(
|
||||
Tool(
|
||||
name="TensorFlowTool",
|
||||
executor=TensorFlowToolExecutor(),
|
||||
definition=ToolDefinition(
|
||||
name="tensorflow_analysis",
|
||||
description="Perform machine learning analysis using TensorFlow",
|
||||
parameters=TensorFlowAnalysisAction.model_json_schema()
|
||||
)
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### 4.3 Runtime Integration
|
||||
|
||||
#### 4.3.1 Custom Agent Loader
|
||||
|
||||
```python
|
||||
# startup/init_agent.py
|
||||
import json
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
from openhands.sdk.agent.base import AgentBase
|
||||
from openhands.sdk.llm import LLM
|
||||
from openhands.sdk.tool import Tool, resolve_tool
|
||||
|
||||
class CustomAgentLoader:
|
||||
"""Loads custom agents from configuration."""
|
||||
|
||||
def __init__(self, config_path: str = "/app/config/agent_config.json"):
|
||||
self.config_path = Path(config_path)
|
||||
self.config = self._load_config()
|
||||
|
||||
def _load_config(self) -> dict:
|
||||
"""Load agent configuration from JSON file."""
|
||||
with open(self.config_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
def create_agent(self, llm: LLM) -> AgentBase:
|
||||
"""Create custom agent instance."""
|
||||
agent_config = self.config["agent"]
|
||||
|
||||
# Import custom agent class
|
||||
module = importlib.import_module(agent_config["module"])
|
||||
agent_class = getattr(module, agent_config["class"])
|
||||
|
||||
# Load custom tools
|
||||
tools = self._load_tools()
|
||||
|
||||
# Create agent instance
|
||||
agent = agent_class(
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
config=self.config.get("environment", {})
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
def _load_tools(self) -> List[Tool]:
|
||||
"""Load and resolve custom tools."""
|
||||
tools = []
|
||||
for tool_config in self.config.get("tools", []):
|
||||
if "module" in tool_config:
|
||||
# Import custom tool module to register it
|
||||
importlib.import_module(tool_config["module"])
|
||||
|
||||
tool = resolve_tool(tool_config["name"])
|
||||
tools.append(tool)
|
||||
|
||||
return tools
|
||||
```
|
||||
|
||||
#### 4.3.2 Agent Server Startup Integration
|
||||
|
||||
```python
|
||||
# Modified agent server startup in software-agent-sdk
|
||||
import os
|
||||
from openhands.agent_server.api import app
|
||||
from openhands.agent_server.conversation_service import ConversationService
|
||||
from startup.init_agent import CustomAgentLoader
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""Initialize custom agent during server startup."""
|
||||
|
||||
# Check for custom agent configuration
|
||||
custom_agent_module = os.getenv('CUSTOM_AGENT_MODULE')
|
||||
custom_agent_class = os.getenv('CUSTOM_AGENT_CLASS')
|
||||
|
||||
if custom_agent_module and custom_agent_class:
|
||||
# Load custom agent
|
||||
loader = CustomAgentLoader()
|
||||
app.state.agent_factory = loader.create_agent
|
||||
print(f"Loaded custom agent: {custom_agent_class}")
|
||||
else:
|
||||
# Use default agent
|
||||
from openhands.sdk.agent import Agent
|
||||
app.state.agent_factory = lambda llm: Agent(llm=llm, tools=get_default_tools())
|
||||
print("Using default OpenHands agent")
|
||||
```
|
||||
|
||||
### 4.4 Enhanced Sandbox Service Integration
|
||||
|
||||
#### 4.4.1 Permission-Aware Sandbox Service
|
||||
|
||||
```python
|
||||
# openhands/app_server/sandbox/enhanced_sandbox_spec_service.py
|
||||
from openhands.app_server.sandbox.sandbox_spec_service import SandboxSpecService
|
||||
from openhands.app_server.sandbox.sandbox_spec_models import SandboxSpecInfo, EnhancedSandboxSpecInfo
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
class EnhancedSandboxSpecService(SandboxSpecService):
|
||||
"""Enhanced sandbox service with permissions and custom agent support."""
|
||||
|
||||
def __init__(self, spec_registry: Dict[str, EnhancedSandboxSpecInfo]):
|
||||
super().__init__()
|
||||
self.spec_registry = spec_registry
|
||||
|
||||
def get_available_sandbox_specs(self, user_email: str, user_groups: List[str]) -> List[str]:
|
||||
"""Get sandbox specs available to the user based on permissions."""
|
||||
available_specs = []
|
||||
|
||||
for spec_key, spec in self.spec_registry.items():
|
||||
if self._has_permission(spec, user_email, user_groups):
|
||||
available_specs.append(spec_key)
|
||||
|
||||
return available_specs
|
||||
|
||||
def get_sandbox_spec_by_id(
|
||||
self,
|
||||
spec_id: str,
|
||||
user_email: str,
|
||||
user_groups: List[str]
|
||||
) -> SandboxSpecInfo:
|
||||
"""Get sandbox spec by ID with permission check."""
|
||||
|
||||
if spec_id not in self.spec_registry:
|
||||
# Fall back to default specs for backward compatibility
|
||||
return super().get_default_sandbox_specs()[0]
|
||||
|
||||
enhanced_spec = self.spec_registry[spec_id]
|
||||
|
||||
# Check permissions
|
||||
if not self._has_permission(enhanced_spec, user_email, user_groups):
|
||||
raise PermissionError(f"User {user_email} does not have access to spec {spec_id}")
|
||||
|
||||
# Convert to SandboxSpecInfo for existing infrastructure
|
||||
return self._convert_to_sandbox_spec_info(enhanced_spec)
|
||||
|
||||
def _has_permission(
|
||||
self,
|
||||
spec: EnhancedSandboxSpecInfo,
|
||||
user_email: str,
|
||||
user_groups: List[str]
|
||||
) -> bool:
|
||||
"""Check if user has permission to use the sandbox spec."""
|
||||
|
||||
# Owner always has access
|
||||
if spec.permissions.owner == user_email:
|
||||
return True
|
||||
|
||||
# Check user permissions
|
||||
if user_email in spec.permissions.users:
|
||||
return True
|
||||
|
||||
# Check group permissions
|
||||
for group in user_groups:
|
||||
if group in spec.permissions.groups:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _convert_to_sandbox_spec_info(self, enhanced_spec: EnhancedSandboxSpecInfo) -> SandboxSpecInfo:
|
||||
"""Convert enhanced spec to standard SandboxSpecInfo."""
|
||||
|
||||
# Build environment variables including agent configuration
|
||||
env_vars = {
|
||||
'OPENVSCODE_SERVER_ROOT': '/openhands/.openvscode-server',
|
||||
'OH_ENABLE_VNC': '0',
|
||||
'LOG_JSON': 'true',
|
||||
'OH_CONVERSATIONS_PATH': '/workspace/conversations',
|
||||
'OH_BASH_EVENTS_DIR': '/workspace/bash_events',
|
||||
'PYTHONUNBUFFERED': '1',
|
||||
'ENV_LOG_LEVEL': '20',
|
||||
**enhanced_spec.initial_env
|
||||
}
|
||||
|
||||
# Add custom agent configuration if specified
|
||||
if enhanced_spec.agent.type == "custom":
|
||||
env_vars.update({
|
||||
'CUSTOM_AGENT_MODULE': enhanced_spec.agent.module,
|
||||
'CUSTOM_AGENT_CLASS': enhanced_spec.agent.class_name,
|
||||
})
|
||||
|
||||
return SandboxSpecInfo(
|
||||
id=enhanced_spec.id,
|
||||
command=enhanced_spec.command,
|
||||
initial_env=env_vars,
|
||||
working_dir=enhanced_spec.working_dir,
|
||||
)
|
||||
|
||||
def register_sandbox_spec(
|
||||
self,
|
||||
spec: EnhancedSandboxSpecInfo,
|
||||
admin_user: str
|
||||
) -> str:
|
||||
"""Register a new sandbox spec (admin only)."""
|
||||
|
||||
spec_key = f"{spec.name}:{spec.version}"
|
||||
|
||||
# Validate spec
|
||||
self._validate_sandbox_spec(spec)
|
||||
|
||||
# Store in registry
|
||||
self.spec_registry[spec_key] = spec
|
||||
|
||||
return spec_key
|
||||
|
||||
def _validate_sandbox_spec(self, spec: EnhancedSandboxSpecInfo) -> None:
|
||||
"""Validate sandbox spec for security and correctness."""
|
||||
|
||||
# Image validation
|
||||
if not spec.id or not spec.id.strip():
|
||||
raise ValueError("Image ID cannot be empty")
|
||||
|
||||
# Permission validation
|
||||
if not spec.permissions.owner:
|
||||
raise ValueError("Sandbox spec must have an owner")
|
||||
|
||||
# Agent validation for custom agents
|
||||
if spec.agent.type == "custom":
|
||||
if not spec.agent.module or not spec.agent.class_name:
|
||||
raise ValueError("Custom agents must specify module and class_name")
|
||||
```
|
||||
|
||||
### 4.5 Enhanced API Integration
|
||||
|
||||
#### 4.5.1 Enhanced Conversation Creation
|
||||
|
||||
```python
|
||||
# openhands/server/routes/conversation_routes.py (enhanced)
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Dict, Any, List
|
||||
from uuid import UUID
|
||||
|
||||
from openhands.app_server.sandbox.enhanced_sandbox_spec_service import EnhancedSandboxSpecService
|
||||
from openhands.server.session.agent_session import AgentSession
|
||||
from openhands.server.auth import get_current_user, get_user_groups
|
||||
|
||||
# Enhanced conversation creation request
|
||||
class CreateConversationRequest(BaseModel):
|
||||
initial_message: str
|
||||
workspace_config: Optional[Dict[str, Any]] = None
|
||||
# New field for custom sandbox spec
|
||||
sandbox_spec_id: Optional[str] = None
|
||||
|
||||
@router.post("/conversations")
|
||||
async def create_conversation(
|
||||
request: CreateConversationRequest,
|
||||
current_user: str = Depends(get_current_user),
|
||||
user_groups: List[str] = Depends(get_user_groups),
|
||||
sandbox_service: EnhancedSandboxSpecService = Depends(get_enhanced_sandbox_service)
|
||||
) -> ConversationResponse:
|
||||
"""Create conversation with optional custom sandbox spec."""
|
||||
|
||||
try:
|
||||
if request.sandbox_spec_id:
|
||||
# Use custom sandbox spec with permission check
|
||||
sandbox_spec = sandbox_service.get_sandbox_spec_by_id(
|
||||
request.sandbox_spec_id,
|
||||
current_user,
|
||||
user_groups
|
||||
)
|
||||
else:
|
||||
# Use default sandbox spec
|
||||
sandbox_spec = sandbox_service.get_default_sandbox_specs()[0]
|
||||
|
||||
# Create sandbox and conversation
|
||||
sandbox = await sandbox_service.create_sandbox(sandbox_spec)
|
||||
await wait_for_agent_server_ready(sandbox)
|
||||
|
||||
conversation = await create_conversation_with_sandbox(
|
||||
sandbox=sandbox,
|
||||
initial_message=request.initial_message,
|
||||
workspace_config=request.workspace_config
|
||||
)
|
||||
|
||||
return ConversationResponse(
|
||||
conversation_id=conversation.id,
|
||||
status="created",
|
||||
sandbox_spec_id=request.sandbox_spec_id or "default"
|
||||
)
|
||||
|
||||
except PermissionError as e:
|
||||
raise HTTPException(status_code=403, detail=str(e))
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
```
|
||||
|
||||
#### 4.5.2 Sandbox Spec Management API
|
||||
|
||||
```python
|
||||
# openhands/server/routes/sandbox_spec_routes.py (new)
|
||||
from fastapi import APIRouter, HTTPException, Depends, UploadFile, File
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional
|
||||
import yaml
|
||||
|
||||
from openhands.app_server.sandbox.enhanced_sandbox_spec_service import EnhancedSandboxSpecService
|
||||
from openhands.app_server.sandbox.sandbox_spec_models import EnhancedSandboxSpecInfo
|
||||
from openhands.server.auth import get_current_user, get_user_groups, require_admin
|
||||
|
||||
router = APIRouter(prefix="/api/sandbox-specs", tags=["Sandbox Specs"])
|
||||
|
||||
@router.get("/")
|
||||
async def list_available_sandbox_specs(
|
||||
current_user: str = Depends(get_current_user),
|
||||
user_groups: List[str] = Depends(get_user_groups),
|
||||
sandbox_service: EnhancedSandboxSpecService = Depends(get_enhanced_sandbox_service)
|
||||
) -> List[str]:
|
||||
"""List sandbox specs available to the current user."""
|
||||
|
||||
return sandbox_service.get_available_sandbox_specs(current_user, user_groups)
|
||||
|
||||
@router.post("/")
|
||||
async def register_sandbox_spec(
|
||||
spec_data: EnhancedSandboxSpecInfo,
|
||||
current_user: str = Depends(require_admin),
|
||||
sandbox_service: EnhancedSandboxSpecService = Depends(get_enhanced_sandbox_service)
|
||||
) -> Dict[str, str]:
|
||||
"""Register a new sandbox spec (admin only)."""
|
||||
|
||||
try:
|
||||
spec_key = sandbox_service.register_sandbox_spec(spec_data, current_user)
|
||||
return {"spec_id": spec_key, "status": "registered"}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
@router.post("/upload")
|
||||
async def upload_custom_image(
|
||||
dockerfile: UploadFile = File(...),
|
||||
context: UploadFile = File(...),
|
||||
spec: UploadFile = File(...),
|
||||
current_user: str = Depends(get_current_user),
|
||||
sandbox_service: EnhancedSandboxSpecService = Depends(get_enhanced_sandbox_service)
|
||||
) -> Dict[str, str]:
|
||||
"""Upload custom image with Dockerfile and context (with security validation)."""
|
||||
|
||||
try:
|
||||
# Parse spec file
|
||||
spec_content = await spec.read()
|
||||
spec_data = yaml.safe_load(spec_content)
|
||||
|
||||
# Validate user has permission to create specs
|
||||
if not _can_user_create_specs(current_user):
|
||||
raise HTTPException(status_code=403, detail="User not authorized to create custom specs")
|
||||
|
||||
# Security validation of Dockerfile
|
||||
dockerfile_content = await dockerfile.read()
|
||||
_validate_dockerfile_security(dockerfile_content)
|
||||
|
||||
# Build image (implementation depends on build system)
|
||||
image_id = await _build_custom_image(dockerfile_content, context, current_user)
|
||||
|
||||
# Create enhanced spec
|
||||
enhanced_spec = EnhancedSandboxSpecInfo(**spec_data)
|
||||
enhanced_spec.id = image_id
|
||||
enhanced_spec.permissions.owner = current_user
|
||||
|
||||
# Register the spec
|
||||
spec_key = sandbox_service.register_sandbox_spec(enhanced_spec, current_user)
|
||||
|
||||
return {"spec_id": spec_key, "image_id": image_id, "status": "uploaded"}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Upload failed: {str(e)}")
|
||||
```
|
||||
|
||||
## 5. Implementation Plan
|
||||
|
||||
All implementation must pass existing lints and tests. New functionality requires comprehensive test coverage including unit tests, integration tests, and end-to-end scenarios.
|
||||
|
||||
### 5.1 Enhanced Sandbox Models and Permissions (M1)
|
||||
|
||||
#### 5.1.1 Enhanced Sandbox Specification Models
|
||||
|
||||
* `openhands/app_server/sandbox/sandbox_spec_models.py` (enhanced)
|
||||
* `tests/unit/app_server/sandbox/test_enhanced_sandbox_spec_models.py`
|
||||
|
||||
Extend existing `SandboxSpecInfo` with `EnhancedSandboxSpecInfo` including agent metadata, permissions, and resource requirements. This is the **core requirement** identified by the engineer.
|
||||
|
||||
#### 5.1.2 Permission System Foundation
|
||||
|
||||
* `openhands/server/auth/permissions.py`
|
||||
* `tests/unit/server/auth/test_permissions.py`
|
||||
|
||||
Implement user and group-based permission system for sandbox spec access control. This addresses the **security concerns** from V0 mentioned by the engineer.
|
||||
|
||||
**Demo**: Create enhanced sandbox specs with permission restrictions and verify access control works correctly.
|
||||
|
||||
### 5.2 Enhanced Sandbox Service (M2)
|
||||
|
||||
#### 5.2.1 Permission-Aware Sandbox Service
|
||||
|
||||
* `openhands/app_server/sandbox/enhanced_sandbox_spec_service.py`
|
||||
* `tests/unit/app_server/sandbox/test_enhanced_sandbox_spec_service.py`
|
||||
|
||||
Extend existing `SandboxSpecService` with permission checks and enhanced spec management. This **builds on existing infrastructure** as the engineer suggested.
|
||||
|
||||
#### 5.2.2 Agent Server Startup Integration
|
||||
|
||||
* `openhands-agent-server/openhands/agent_server/custom_agent_loader.py`
|
||||
* `tests/unit/agent_server/test_custom_agent_loader.py`
|
||||
|
||||
Implement custom agent loading mechanism in agent server startup process with configuration-driven agent instantiation.
|
||||
|
||||
**Demo**: Deploy custom agents using enhanced sandbox specs and verify permission-based access control works end-to-end.
|
||||
|
||||
### 5.3 Image Management and API Integration (M3)
|
||||
|
||||
#### 5.3.1 Secure Image Management
|
||||
|
||||
* `openhands/app_server/sandbox/image_builder.py`
|
||||
* `openhands/app_server/security/dockerfile_validator.py`
|
||||
* `tests/unit/app_server/sandbox/test_image_builder.py`
|
||||
* `tests/unit/app_server/security/test_dockerfile_validator.py`
|
||||
|
||||
Implement both **pre-built image registration** and **secure user upload** workflows as identified by the engineer. This addresses the security issues from V0.
|
||||
|
||||
#### 5.3.2 Enhanced Conversation API
|
||||
|
||||
* `openhands/server/routes/conversation_routes.py` (enhanced)
|
||||
* `openhands/server/routes/sandbox_spec_routes.py` (new)
|
||||
* `tests/unit/server/routes/test_enhanced_conversation_routes.py`
|
||||
* `tests/unit/server/routes/test_sandbox_spec_routes.py`
|
||||
|
||||
Enhance existing conversation creation API to support `sandbox_spec_id` parameter and add new sandbox spec management endpoints.
|
||||
|
||||
**Demo**: Create conversations with custom sandbox specs through existing API endpoints and demonstrate both pre-built and user-uploaded image workflows.
|
||||
|
||||
### 5.4 Advanced Security and Management (M4)
|
||||
|
||||
#### 5.4.1 Image Security Validation
|
||||
|
||||
* `openhands/app_server/security/image_scanner.py`
|
||||
* `openhands/app_server/security/security_policies.py`
|
||||
* `tests/unit/app_server/security/test_image_scanner.py`
|
||||
|
||||
Implement comprehensive security validation including image vulnerability scanning, Dockerfile analysis, and approval workflows.
|
||||
|
||||
#### 5.4.2 Spec Registry and Lifecycle Management
|
||||
|
||||
* `openhands/app_server/sandbox/spec_registry.py`
|
||||
* `openhands/app_server/sandbox/spec_lifecycle.py`
|
||||
* `tests/unit/app_server/sandbox/test_spec_registry.py`
|
||||
|
||||
Add persistent storage for enhanced sandbox specs, version management, and lifecycle policies (deprecation, cleanup).
|
||||
|
||||
**Demo**: Deploy multiple custom agents with different permission levels, demonstrate security validation workflows, and show proper spec lifecycle management.
|
||||
|
||||
---
|
||||
|
||||
## Key Alignment with Engineer's Approach
|
||||
|
||||
This revised implementation plan directly addresses the engineer's requirements:
|
||||
|
||||
1. **✅ Uses existing sandbox specs system** - Enhanced rather than replaced
|
||||
2. **✅ Permissions as core requirement** - Moved to M1 instead of M4
|
||||
3. **✅ Two image management approaches** - Pre-built registration and secure user uploads
|
||||
4. **✅ Security-first design** - Addresses V0 security issues with comprehensive validation
|
||||
5. **✅ Minimal infrastructure changes** - Builds on existing `SandboxSpecService` and conversation APIs
|
||||
@@ -1,934 +0,0 @@
|
||||
# Dynamic Custom Agent Package Loading (Scenario 2)
|
||||
|
||||
## 1. Introduction
|
||||
|
||||
### 1.1 Problem Statement
|
||||
|
||||
OpenHands V1 architecture uses a fixed agent server image (`ghcr.io/openhands/agent-server:5f62cee-python`) that contains the default agent implementation. Users who want to customize agent behavior with pure Python packages that don't require additional system dependencies currently have no supported mechanism to deploy their custom agents without building entirely new Docker images.
|
||||
|
||||
This creates unnecessary complexity for the common use case where users simply want to:
|
||||
- Customize agent prompts and reasoning logic
|
||||
- Add new Python-based tools and capabilities
|
||||
- Integrate with Python APIs and libraries already available in the base image
|
||||
- Deploy agents with different LLM configurations or specialized workflows
|
||||
|
||||
The current approach forces all customization through the heavyweight Scenario 1 path (custom Docker images), even when the base agent server image already contains all necessary dependencies.
|
||||
|
||||
### 1.2 Proposed Solution
|
||||
|
||||
We propose implementing **Dynamic Custom Agent Package Loading** within the existing V1 agent server container. This allows users to deploy custom agents by providing Python packages that are downloaded, installed, and instantiated at runtime without requiring custom Docker images.
|
||||
|
||||
Users will be able to:
|
||||
1. Package their custom agents as standard Python packages (pip-installable)
|
||||
2. Specify agent package URLs (Git repositories, PyPI packages, or ZIP archives) in conversation creation
|
||||
3. Have the agent server dynamically download and install the package at startup
|
||||
4. Instantiate their custom agent within the existing container environment
|
||||
5. Maintain full compatibility with the existing HTTP API (`/ask_agent` endpoint)
|
||||
|
||||
The solution leverages the existing V1 architecture's agent server container but extends the startup process to support dynamic agent loading based on environment configuration.
|
||||
|
||||
**Trade-offs**: This approach is limited to Python packages that can run within the existing agent server environment. Users needing custom system dependencies, non-Python tools, or different base images must use Scenario 1. However, this covers the majority of agent customization use cases with significantly reduced complexity.
|
||||
|
||||
## 2. User Interface
|
||||
|
||||
### 2.1 Custom Agent Package Structure
|
||||
|
||||
Users create a standard Python package with the required interface:
|
||||
|
||||
```python
|
||||
# my_custom_agent/
|
||||
├── setup.py
|
||||
├── requirements.txt # Optional additional dependencies
|
||||
├── my_custom_agent/
|
||||
│ ├── __init__.py
|
||||
│ ├── agent.py # Main agent implementation
|
||||
│ ├── tools.py # Custom tools (optional)
|
||||
│ └── config.py # Agent configuration
|
||||
```
|
||||
|
||||
### 2.2 Agent Implementation
|
||||
|
||||
```python
|
||||
# my_custom_agent/agent.py
|
||||
from openhands.sdk.agent.base import AgentBase
|
||||
from openhands.sdk.llm import LLM
|
||||
from openhands.sdk.tool import Tool
|
||||
from typing import List, Dict, Any
|
||||
|
||||
class MyCustomAgent(AgentBase):
|
||||
"""Custom agent with specialized behavior."""
|
||||
|
||||
def __init__(self, llm: LLM, tools: List[Tool], config: Dict[str, Any] = None):
|
||||
super().__init__(llm=llm, tools=tools)
|
||||
self.config = config or {}
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize custom agent resources."""
|
||||
# Custom initialization logic
|
||||
pass
|
||||
|
||||
# Factory function for agent creation
|
||||
def create_agent(llm: LLM, tools: List[Tool], config: Dict[str, Any] = None) -> AgentBase:
|
||||
"""Factory function to create the custom agent."""
|
||||
return MyCustomAgent(llm=llm, tools=tools, config=config)
|
||||
```
|
||||
|
||||
### 2.3 Package Entry Point
|
||||
|
||||
```python
|
||||
# my_custom_agent/__init__.py
|
||||
from .agent import create_agent, MyCustomAgent
|
||||
|
||||
__all__ = ['create_agent', 'MyCustomAgent']
|
||||
```
|
||||
|
||||
### 2.4 Setup Configuration
|
||||
|
||||
```python
|
||||
# setup.py
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name="my-custom-agent",
|
||||
version="1.0.0",
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
# Only additional dependencies beyond base image
|
||||
"requests>=2.25.0",
|
||||
"beautifulsoup4>=4.9.0",
|
||||
],
|
||||
entry_points={
|
||||
'openhands.agents': [
|
||||
'my_custom_agent = my_custom_agent:create_agent',
|
||||
],
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
### 2.5 Conversation Creation with Dynamic Agent Loading
|
||||
|
||||
Users create conversations by specifying the agent package URL:
|
||||
|
||||
```bash
|
||||
# Create conversation with custom agent package
|
||||
curl -X POST "https://api.openhands.ai/api/conversations" \
|
||||
-H "Authorization: Bearer $API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"agent_package_url": "git+https://github.com/user/my-custom-agent.git",
|
||||
"initial_message": "Help me analyze this codebase",
|
||||
"workspace": {
|
||||
"type": "local",
|
||||
"working_dir": "/workspace/project"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
Alternative package sources:
|
||||
```bash
|
||||
# PyPI package
|
||||
"agent_package_url": "my-custom-agent==1.0.0"
|
||||
|
||||
# ZIP archive
|
||||
"agent_package_url": "https://example.com/agents/my-custom-agent.zip"
|
||||
|
||||
# Private Git repository
|
||||
"agent_package_url": "git+https://token@github.com/private/agent.git"
|
||||
```
|
||||
|
||||
## 3. Other Context
|
||||
|
||||
### 3.1 Current V1 Architecture Overview
|
||||
|
||||
The OpenHands V1 architecture follows a distributed service model with clear separation between the main application server and agent execution environment. Understanding this architecture is crucial for implementing dynamic agent loading.
|
||||
|
||||
#### 3.1.1 Service Separation
|
||||
|
||||
The V1 system consists of three primary components:
|
||||
|
||||
1. **Main Server** (`openhands/app_server/`): Handles user requests, conversation management, and sandbox orchestration
|
||||
2. **Agent Server** (`software-agent-sdk/openhands-agent-server/`): Executes agent logic and manages conversation state
|
||||
3. **Action Execution Server**: Handles tool execution (bash commands, file operations) within sandboxed environments
|
||||
|
||||
#### 3.1.2 Communication Flow
|
||||
|
||||
The current communication pattern follows this sequence:
|
||||
|
||||
```
|
||||
User Request → Main Server → HTTP API → Agent Server → Agent Instance → Tools → Response
|
||||
```
|
||||
|
||||
This separation allows for:
|
||||
- **Isolation**: Agent execution is isolated from the main application
|
||||
- **Scalability**: Multiple agent servers can be spawned for different conversations
|
||||
- **Security**: Sandboxed execution prevents agent actions from affecting the host system
|
||||
- **Flexibility**: Different agent configurations can be deployed without affecting the main server
|
||||
|
||||
#### 3.1.3 Container Orchestration
|
||||
|
||||
The main server uses `DockerSandboxSpecService` to create and manage agent server containers:
|
||||
|
||||
- **Image Selection**: Currently hardcoded to `ghcr.io/openhands/agent-server:5f62cee-python`
|
||||
- **Environment Configuration**: Passed via `initial_env` in `SandboxSpecInfo`
|
||||
- **Network Isolation**: Each conversation gets its own container instance
|
||||
- **Resource Management**: Memory and CPU limits enforced at container level
|
||||
|
||||
### 3.2 Agent Server Internal Architecture
|
||||
|
||||
#### 3.2.1 FastAPI Application Structure
|
||||
|
||||
The agent server is built as a FastAPI application with these key components:
|
||||
|
||||
- **Conversation Router** (`conversation_router.py`): Handles HTTP endpoints for agent interaction
|
||||
- **Conversation Service** (`conversation_service.py`): Manages conversation lifecycle and state
|
||||
- **Event Service** (`event_service.py`): Processes agent actions and observations
|
||||
- **Dependencies** (`dependencies.py`): Provides dependency injection for services
|
||||
|
||||
#### 3.2.2 Agent Instantiation Pattern
|
||||
|
||||
Currently, agents are instantiated during server startup using a fixed pattern:
|
||||
|
||||
```python
|
||||
# Simplified current pattern
|
||||
agent = Agent(
|
||||
llm=LLM(model="default-model", api_key="..."),
|
||||
tools=[TerminalTool(), FileEditorTool(), ...]
|
||||
)
|
||||
```
|
||||
|
||||
This creates a single agent instance that serves all requests to that container.
|
||||
|
||||
#### 3.2.3 Request Processing Flow
|
||||
|
||||
When the `/ask_agent` endpoint receives a request:
|
||||
|
||||
1. **Request Validation**: `AskAgentRequest` is validated and parsed
|
||||
2. **Conversation Lookup**: Conversation state is retrieved or created
|
||||
3. **Agent Invocation**: The fixed agent instance processes the question
|
||||
4. **Response Formatting**: Result is wrapped in `AskAgentResponse`
|
||||
5. **HTTP Response**: JSON response sent back to main server
|
||||
|
||||
### 3.3 Software Agent SDK Integration Points
|
||||
|
||||
#### 3.3.1 Agent Interface Requirements
|
||||
|
||||
The `software-agent-sdk` defines the contract that all agents must follow:
|
||||
|
||||
- **AgentBase**: Abstract base class requiring `llm` and `tools` parameters
|
||||
- **Tool Integration**: Agents must work with the standardized tool system
|
||||
- **Event Handling**: Agents process events through the conversation framework
|
||||
- **State Management**: Agents maintain conversation context through event streams
|
||||
|
||||
#### 3.3.2 Tool System Architecture
|
||||
|
||||
The tool system provides the foundation for agent capabilities:
|
||||
|
||||
- **Tool Registration**: Tools are registered globally and resolved by name
|
||||
- **Execution Framework**: `ToolExecutor` classes handle action execution
|
||||
- **Built-in Tools**: Standard tools (Terminal, FileEditor, Browser) are always available
|
||||
- **Custom Tools**: Additional tools can be registered through the plugin system
|
||||
|
||||
#### 3.3.3 LLM Integration
|
||||
|
||||
Agents interact with language models through the SDK's LLM abstraction:
|
||||
|
||||
- **Provider Agnostic**: Supports multiple LLM providers through unified interface
|
||||
- **Configuration**: LLM settings (model, API keys, parameters) are configurable
|
||||
- **Response Processing**: Structured handling of LLM responses and tool calls
|
||||
|
||||
### 3.4 Dynamic Loading Technical Foundation
|
||||
|
||||
#### 3.4.1 Python Package Management
|
||||
|
||||
Our dynamic loading approach leverages Python's built-in package management:
|
||||
|
||||
- **pip install**: Supports Git repositories, PyPI packages, and archive files
|
||||
- **importlib**: Enables runtime module importing and class instantiation
|
||||
- **entry_points**: Provides standardized plugin discovery mechanism
|
||||
- **sys.path**: Allows dynamic modification of Python module search paths
|
||||
|
||||
#### 3.4.2 Container Environment Considerations
|
||||
|
||||
The agent server container provides a controlled environment for dynamic loading:
|
||||
|
||||
- **Python Runtime**: Pre-installed Python 3.x with pip and common libraries
|
||||
- **Network Access**: Required for downloading packages from external sources
|
||||
- **File System**: Writable areas for package installation and caching
|
||||
- **Security Context**: Isolated from host system with appropriate permissions
|
||||
|
||||
#### 3.4.3 State Management Implications
|
||||
|
||||
Dynamic agent loading affects conversation state management:
|
||||
|
||||
- **Agent Persistence**: Custom agents must maintain state across requests
|
||||
- **Configuration Isolation**: Different conversations can use different agent configurations
|
||||
- **Resource Cleanup**: Proper cleanup of agent resources when conversations end
|
||||
- **Error Recovery**: Fallback mechanisms when custom agents fail to load or execute
|
||||
|
||||
## 4. Technical Design
|
||||
|
||||
### 4.1 Current V1 Agent Instantiation Flow
|
||||
|
||||
To understand how our proposal integrates with the existing system, it's important to first examine how agents are currently instantiated and executed in the V1 architecture.
|
||||
|
||||
#### 4.1.1 Current Agent Server Startup Process
|
||||
|
||||
In the current V1 flow, agent instantiation follows this sequence:
|
||||
|
||||
1. **Main Server Request**: When a user creates a conversation, the main server (`openhands/app_server`) creates a sandbox specification via `DockerSandboxSpecService.get_default_sandbox_specs()`
|
||||
2. **Container Launch**: The sandbox service launches the agent server container using the hardcoded image `ghcr.io/openhands/agent-server:5f62cee-python`
|
||||
3. **Agent Server Initialization**: The agent server container starts with the command `['--port', '8000']` and initializes a FastAPI application
|
||||
4. **Default Agent Creation**: During startup, the agent server creates a default agent instance (typically from the software-agent-sdk) with standard tools and configuration
|
||||
5. **HTTP API Ready**: The agent server exposes the `/api/conversations/{id}/ask_agent` endpoint, routing requests to the default agent instance
|
||||
|
||||
#### 4.1.2 Current Agent Execution Flow
|
||||
|
||||
When a user sends a message through the V1 API:
|
||||
|
||||
1. **HTTP Request**: Main server makes POST request to `http://agent-server:8000/api/conversations/{id}/ask_agent`
|
||||
2. **Agent Router**: `conversation_router.py` receives the request and extracts the `AskAgentRequest`
|
||||
3. **Conversation Service**: `ConversationService.ask_agent()` method is called with the user's question
|
||||
4. **Event Service**: The request is forwarded to `EventService.ask_agent()` which manages the conversation state
|
||||
5. **Agent Execution**: The default agent processes the question using its configured LLM and tools
|
||||
6. **Response Return**: The agent's response is returned through the same HTTP chain back to the main server
|
||||
|
||||
#### 4.1.3 Limitations of Current Approach
|
||||
|
||||
The current system has several limitations for custom agent deployment:
|
||||
|
||||
- **Fixed Agent Implementation**: The agent server container contains a single, hardcoded agent implementation
|
||||
- **Static Configuration**: Agent behavior cannot be modified without rebuilding the entire container
|
||||
- **No Runtime Customization**: Users cannot specify different agent types or configurations per conversation
|
||||
- **Deployment Complexity**: Any agent customization requires building and maintaining custom Docker images
|
||||
|
||||
### 4.2 Proposed Dynamic Agent Loading Architecture
|
||||
|
||||
Our proposal extends the current V1 flow by introducing dynamic agent loading capabilities while maintaining full backward compatibility with existing APIs and infrastructure.
|
||||
|
||||
#### 4.2.1 Enhanced Agent Server Startup Process
|
||||
|
||||
The modified startup process introduces agent selection based on environment configuration:
|
||||
|
||||
1. **Environment Detection**: During agent server startup, check for `CUSTOM_AGENT_PACKAGE_URL` environment variable
|
||||
2. **Conditional Loading**: If custom agent URL is present, download and install the package; otherwise use default agent
|
||||
3. **Agent Factory Creation**: Create an agent factory function that can instantiate either custom or default agents
|
||||
4. **HTTP API Registration**: Register the same `/ask_agent` endpoint, but route to the dynamically selected agent
|
||||
|
||||
#### 4.2.2 Dynamic Package Installation Process
|
||||
|
||||
When a custom agent package URL is detected, the system performs these steps:
|
||||
|
||||
1. **Package Download**: Use `pip install` to download the package from Git, PyPI, or ZIP sources
|
||||
2. **Dependency Resolution**: Install any additional Python dependencies specified in the package
|
||||
3. **Module Import**: Use `importlib` to dynamically import the custom agent module
|
||||
4. **Agent Instantiation**: Call the package's `create_agent()` factory function with LLM and tools
|
||||
5. **Initialization**: Execute any custom initialization logic defined by the agent
|
||||
6. **Caching**: Cache the agent instance for reuse across multiple requests
|
||||
|
||||
#### 4.2.3 Modified Execution Flow
|
||||
|
||||
The execution flow remains largely unchanged from the user's perspective, but internally:
|
||||
|
||||
1. **Same HTTP API**: The `/ask_agent` endpoint signature and behavior remain identical
|
||||
2. **Dynamic Routing**: Requests are routed to either custom or default agent based on startup configuration
|
||||
3. **Transparent Operation**: The main server is unaware of whether it's communicating with a custom or default agent
|
||||
4. **Consistent Response Format**: All agents return responses in the same `AskAgentResponse` format
|
||||
|
||||
### 4.3 Integration Points and Modifications
|
||||
|
||||
#### 4.3.1 Sandbox Service Modifications
|
||||
|
||||
The main server's sandbox service requires minimal changes to support dynamic agent loading:
|
||||
|
||||
```python
|
||||
# Current: Fixed environment for all conversations
|
||||
def get_default_sandbox_specs():
|
||||
return [SandboxSpecInfo(
|
||||
id=AGENT_SERVER_IMAGE,
|
||||
command=['--port', '8000'],
|
||||
initial_env={...} # Standard environment
|
||||
)]
|
||||
|
||||
# Enhanced: Dynamic environment based on conversation requirements
|
||||
def create_dynamic_agent_sandbox_spec(agent_package_url: str):
|
||||
return SandboxSpecInfo(
|
||||
id=AGENT_SERVER_IMAGE, # Same base image
|
||||
command=['--port', '8000'],
|
||||
initial_env={
|
||||
...standard_env,
|
||||
'CUSTOM_AGENT_PACKAGE_URL': agent_package_url # New variable
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
#### 4.3.2 Agent Server Startup Modifications
|
||||
|
||||
The agent server startup process is enhanced to detect and load custom agents:
|
||||
|
||||
```python
|
||||
# Current: Fixed agent creation
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
app.state.agent = DefaultAgent(llm=default_llm, tools=default_tools)
|
||||
|
||||
# Enhanced: Dynamic agent creation
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
custom_agent_url = os.getenv('CUSTOM_AGENT_PACKAGE_URL')
|
||||
if custom_agent_url:
|
||||
loader = DynamicAgentLoader()
|
||||
app.state.agent = await loader.load_agent_from_url(custom_agent_url, ...)
|
||||
else:
|
||||
app.state.agent = DefaultAgent(llm=default_llm, tools=default_tools)
|
||||
```
|
||||
|
||||
#### 4.3.3 Conversation Service Integration
|
||||
|
||||
The conversation service routing logic is updated to use the dynamically loaded agent:
|
||||
|
||||
```python
|
||||
# Current: Direct agent usage
|
||||
async def ask_agent(self, conversation_id: UUID, question: str) -> str:
|
||||
event_service = self.event_services[conversation_id]
|
||||
return await event_service.ask_agent(question)
|
||||
|
||||
# Enhanced: Dynamic agent resolution
|
||||
async def ask_agent(self, conversation_id: UUID, question: str) -> str:
|
||||
event_service = self.event_services[conversation_id]
|
||||
# Agent is now dynamically determined at startup
|
||||
return await event_service.ask_agent(question)
|
||||
```
|
||||
|
||||
### 4.4 Dynamic Agent Loading Implementation
|
||||
|
||||
#### 4.4.1 Agent Package Loader
|
||||
|
||||
```python
|
||||
# openhands/agent_server/dynamic_agent_loader.py
|
||||
import subprocess
|
||||
import importlib
|
||||
import tempfile
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Any, Optional
|
||||
from urllib.parse import urlparse
|
||||
from pathlib import Path
|
||||
|
||||
from openhands.sdk.agent.base import AgentBase
|
||||
from openhands.sdk.llm import LLM
|
||||
from openhands.sdk.tool import Tool
|
||||
from openhands.sdk.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class DynamicAgentLoader:
|
||||
"""Loads custom agents from package URLs at runtime."""
|
||||
|
||||
def __init__(self):
|
||||
self.installed_packages: Dict[str, str] = {}
|
||||
|
||||
async def load_agent_from_url(
|
||||
self,
|
||||
package_url: str,
|
||||
llm: LLM,
|
||||
tools: list[Tool],
|
||||
config: Optional[Dict[str, Any]] = None
|
||||
) -> AgentBase:
|
||||
"""Load and instantiate agent from package URL."""
|
||||
|
||||
# Check if already installed
|
||||
if package_url in self.installed_packages:
|
||||
package_name = self.installed_packages[package_url]
|
||||
return await self._create_agent_instance(package_name, llm, tools, config)
|
||||
|
||||
# Install the package
|
||||
package_name = await self._install_package(package_url)
|
||||
self.installed_packages[package_url] = package_name
|
||||
|
||||
# Create agent instance
|
||||
return await self._create_agent_instance(package_name, llm, tools, config)
|
||||
|
||||
async def _install_package(self, package_url: str) -> str:
|
||||
"""Install package from URL and return package name."""
|
||||
|
||||
logger.info(f"Installing custom agent package: {package_url}")
|
||||
|
||||
try:
|
||||
# Install package using pip
|
||||
result = subprocess.run([
|
||||
sys.executable, "-m", "pip", "install", package_url
|
||||
], capture_output=True, text=True, check=True)
|
||||
|
||||
logger.info(f"Package installation successful: {result.stdout}")
|
||||
|
||||
# Extract package name from URL
|
||||
package_name = self._extract_package_name(package_url)
|
||||
return package_name
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Failed to install package {package_url}: {e.stderr}")
|
||||
raise RuntimeError(f"Package installation failed: {e.stderr}")
|
||||
|
||||
def _extract_package_name(self, package_url: str) -> str:
|
||||
"""Extract package name from various URL formats."""
|
||||
|
||||
if package_url.startswith('git+'):
|
||||
# Git URL: extract repo name
|
||||
url = package_url.replace('git+', '')
|
||||
return Path(urlparse(url).path).stem
|
||||
elif '==' in package_url:
|
||||
# PyPI with version: extract package name
|
||||
return package_url.split('==')[0]
|
||||
elif package_url.endswith('.zip'):
|
||||
# ZIP file: extract filename
|
||||
return Path(urlparse(package_url).path).stem
|
||||
else:
|
||||
# Assume it's a simple package name
|
||||
return package_url
|
||||
|
||||
async def _create_agent_instance(
|
||||
self,
|
||||
package_name: str,
|
||||
llm: LLM,
|
||||
tools: list[Tool],
|
||||
config: Optional[Dict[str, Any]] = None
|
||||
) -> AgentBase:
|
||||
"""Create agent instance from installed package."""
|
||||
|
||||
try:
|
||||
# Import the package
|
||||
module = importlib.import_module(package_name)
|
||||
|
||||
# Look for create_agent function
|
||||
if hasattr(module, 'create_agent'):
|
||||
create_agent_func = getattr(module, 'create_agent')
|
||||
agent = create_agent_func(llm=llm, tools=tools, config=config)
|
||||
else:
|
||||
# Fallback: look for agent class
|
||||
agent_classes = [
|
||||
attr for attr in dir(module)
|
||||
if (isinstance(getattr(module, attr), type) and
|
||||
issubclass(getattr(module, attr), AgentBase) and
|
||||
getattr(module, attr) != AgentBase)
|
||||
]
|
||||
|
||||
if not agent_classes:
|
||||
raise RuntimeError(f"No agent class found in package {package_name}")
|
||||
|
||||
agent_class = getattr(module, agent_classes[0])
|
||||
agent = agent_class(llm=llm, tools=tools, config=config)
|
||||
|
||||
# Initialize the agent
|
||||
if hasattr(agent, 'initialize'):
|
||||
await agent.initialize()
|
||||
|
||||
logger.info(f"Successfully created agent from package: {package_name}")
|
||||
return agent
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create agent from package {package_name}: {e}")
|
||||
raise RuntimeError(f"Agent instantiation failed: {e}")
|
||||
```
|
||||
|
||||
#### 4.1.2 Agent Server Integration
|
||||
|
||||
```python
|
||||
# Modified openhands/agent_server/conversation_service.py
|
||||
import os
|
||||
from typing import Optional
|
||||
from openhands.agent_server.dynamic_agent_loader import DynamicAgentLoader
|
||||
from openhands.sdk.agent.base import AgentBase
|
||||
from openhands.sdk.agent import Agent # Default agent
|
||||
|
||||
class ConversationService:
|
||||
"""Enhanced conversation service with dynamic agent loading."""
|
||||
|
||||
def __init__(self, config: Config):
|
||||
self.config = config
|
||||
self.agent_loader = DynamicAgentLoader()
|
||||
self._default_agent_factory = None
|
||||
self._custom_agent_cache: Dict[str, AgentBase] = {}
|
||||
|
||||
async def _get_or_create_agent(
|
||||
self,
|
||||
conversation_id: UUID,
|
||||
llm: LLM,
|
||||
tools: list[Tool]
|
||||
) -> AgentBase:
|
||||
"""Get or create agent for conversation."""
|
||||
|
||||
# Check for custom agent package URL in environment
|
||||
custom_agent_url = os.getenv('CUSTOM_AGENT_PACKAGE_URL')
|
||||
|
||||
if custom_agent_url:
|
||||
# Use custom agent
|
||||
if custom_agent_url not in self._custom_agent_cache:
|
||||
agent = await self.agent_loader.load_agent_from_url(
|
||||
package_url=custom_agent_url,
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
config=self._get_agent_config()
|
||||
)
|
||||
self._custom_agent_cache[custom_agent_url] = agent
|
||||
|
||||
return self._custom_agent_cache[custom_agent_url]
|
||||
else:
|
||||
# Use default agent
|
||||
if not self._default_agent_factory:
|
||||
self._default_agent_factory = Agent(llm=llm, tools=tools)
|
||||
|
||||
return self._default_agent_factory
|
||||
|
||||
def _get_agent_config(self) -> Dict[str, Any]:
|
||||
"""Extract agent configuration from environment."""
|
||||
config = {}
|
||||
|
||||
# Parse JSON config if provided
|
||||
config_json = os.getenv('CUSTOM_AGENT_CONFIG')
|
||||
if config_json:
|
||||
import json
|
||||
config.update(json.loads(config_json))
|
||||
|
||||
return config
|
||||
```
|
||||
|
||||
### 4.2 Sandbox Service Integration
|
||||
|
||||
#### 4.2.1 Enhanced Sandbox Specification
|
||||
|
||||
```python
|
||||
# openhands/app_server/sandbox/docker_sandbox_spec_service.py
|
||||
from typing import Optional
|
||||
|
||||
class DockerSandboxSpecService(SandboxSpecService):
|
||||
"""Enhanced sandbox service supporting dynamic agent loading."""
|
||||
|
||||
def create_dynamic_agent_sandbox_spec(
|
||||
self,
|
||||
agent_package_url: str,
|
||||
agent_config: Optional[Dict[str, Any]] = None
|
||||
) -> SandboxSpecInfo:
|
||||
"""Create sandbox spec with dynamic agent loading configuration."""
|
||||
|
||||
# Base environment from existing implementation
|
||||
base_env = {
|
||||
'OPENVSCODE_SERVER_ROOT': '/openhands/.openvscode-server',
|
||||
'OH_ENABLE_VNC': '0',
|
||||
'LOG_JSON': 'true',
|
||||
'OH_CONVERSATIONS_PATH': '/workspace/conversations',
|
||||
'OH_BASH_EVENTS_DIR': '/workspace/bash_events',
|
||||
'PYTHONUNBUFFERED': '1',
|
||||
'ENV_LOG_LEVEL': '20',
|
||||
}
|
||||
|
||||
# Add dynamic agent configuration
|
||||
dynamic_env = {
|
||||
**base_env,
|
||||
'CUSTOM_AGENT_PACKAGE_URL': agent_package_url,
|
||||
}
|
||||
|
||||
# Add agent configuration as JSON if provided
|
||||
if agent_config:
|
||||
import json
|
||||
dynamic_env['CUSTOM_AGENT_CONFIG'] = json.dumps(agent_config)
|
||||
|
||||
return SandboxSpecInfo(
|
||||
id=AGENT_SERVER_IMAGE, # Same base image
|
||||
command=['--port', '8000'],
|
||||
initial_env=dynamic_env,
|
||||
working_dir='/workspace/project',
|
||||
)
|
||||
```
|
||||
|
||||
#### 4.2.2 Conversation Creation API Enhancement
|
||||
|
||||
```python
|
||||
# openhands/server/routes/conversation_routes.py
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
class CreateConversationRequest(BaseModel):
|
||||
"""Enhanced conversation creation request."""
|
||||
initial_message: str
|
||||
workspace_config: Optional[Dict[str, Any]] = None
|
||||
# New field for dynamic agent loading
|
||||
agent_package_url: Optional[str] = None
|
||||
agent_config: Optional[Dict[str, Any]] = None
|
||||
|
||||
@router.post("/conversations")
|
||||
async def create_conversation(
|
||||
request: CreateConversationRequest,
|
||||
sandbox_service: DockerSandboxSpecService = Depends(get_sandbox_service)
|
||||
) -> ConversationResponse:
|
||||
"""Create conversation with optional dynamic agent loading."""
|
||||
|
||||
if request.agent_package_url:
|
||||
# Create sandbox with dynamic agent loading
|
||||
sandbox_spec = sandbox_service.create_dynamic_agent_sandbox_spec(
|
||||
agent_package_url=request.agent_package_url,
|
||||
agent_config=request.agent_config
|
||||
)
|
||||
else:
|
||||
# Use default sandbox specification
|
||||
sandbox_spec = sandbox_service.get_default_sandbox_specs()[0]
|
||||
|
||||
# Create sandbox and conversation
|
||||
sandbox = await sandbox_service.create_sandbox(sandbox_spec)
|
||||
await wait_for_agent_server_ready(sandbox)
|
||||
|
||||
conversation = await create_conversation_with_sandbox(
|
||||
sandbox=sandbox,
|
||||
initial_message=request.initial_message,
|
||||
workspace_config=request.workspace_config
|
||||
)
|
||||
|
||||
return ConversationResponse(
|
||||
conversation_id=conversation.id,
|
||||
status="created",
|
||||
agent_type="custom" if request.agent_package_url else "default"
|
||||
)
|
||||
```
|
||||
|
||||
### 4.3 Agent Server Startup Process
|
||||
|
||||
#### 4.3.1 Enhanced Agent Server Initialization
|
||||
|
||||
```python
|
||||
# openhands/agent_server/api.py startup modification
|
||||
import os
|
||||
from openhands.agent_server.dynamic_agent_loader import DynamicAgentLoader
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""Enhanced startup with dynamic agent loading support."""
|
||||
|
||||
# Initialize dynamic agent loader
|
||||
app.state.agent_loader = DynamicAgentLoader()
|
||||
|
||||
# Check for custom agent package URL
|
||||
custom_agent_url = os.getenv('CUSTOM_AGENT_PACKAGE_URL')
|
||||
|
||||
if custom_agent_url:
|
||||
logger.info(f"Dynamic agent loading enabled: {custom_agent_url}")
|
||||
# Pre-validate package URL (optional)
|
||||
try:
|
||||
await app.state.agent_loader._install_package(custom_agent_url)
|
||||
logger.info("Custom agent package pre-installed successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"Custom agent pre-installation failed: {e}")
|
||||
# Continue with startup - will retry on first conversation
|
||||
else:
|
||||
logger.info("Using default agent configuration")
|
||||
```
|
||||
|
||||
### 4.4 Error Handling and Fallback
|
||||
|
||||
#### 4.4.1 Robust Error Handling
|
||||
|
||||
```python
|
||||
# openhands/agent_server/dynamic_agent_loader.py (enhanced)
|
||||
class DynamicAgentLoader:
|
||||
"""Enhanced loader with comprehensive error handling."""
|
||||
|
||||
async def load_agent_with_fallback(
|
||||
self,
|
||||
package_url: str,
|
||||
llm: LLM,
|
||||
tools: list[Tool],
|
||||
config: Optional[Dict[str, Any]] = None
|
||||
) -> AgentBase:
|
||||
"""Load custom agent with fallback to default agent."""
|
||||
|
||||
try:
|
||||
return await self.load_agent_from_url(package_url, llm, tools, config)
|
||||
except Exception as e:
|
||||
logger.error(f"Custom agent loading failed: {e}")
|
||||
logger.info("Falling back to default agent")
|
||||
|
||||
# Import default agent
|
||||
from openhands.sdk.agent import Agent
|
||||
return Agent(llm=llm, tools=tools)
|
||||
|
||||
async def _validate_package_url(self, package_url: str) -> bool:
|
||||
"""Validate package URL accessibility."""
|
||||
|
||||
try:
|
||||
if package_url.startswith('git+'):
|
||||
# Validate Git repository access
|
||||
import subprocess
|
||||
result = subprocess.run([
|
||||
'git', 'ls-remote', package_url.replace('git+', '')
|
||||
], capture_output=True, timeout=30)
|
||||
return result.returncode == 0
|
||||
elif package_url.startswith('http'):
|
||||
# Validate HTTP URL accessibility
|
||||
import httpx
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.head(package_url, timeout=30)
|
||||
return response.status_code == 200
|
||||
else:
|
||||
# Assume PyPI package - always return True
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
```
|
||||
|
||||
### 4.5 Security and Isolation
|
||||
|
||||
#### 4.5.1 Package Security Validation
|
||||
|
||||
```python
|
||||
# openhands/agent_server/security/package_validator.py
|
||||
import re
|
||||
from typing import List, Set
|
||||
from urllib.parse import urlparse
|
||||
|
||||
class PackageSecurityValidator:
|
||||
"""Validates custom agent packages for security compliance."""
|
||||
|
||||
ALLOWED_DOMAINS: Set[str] = {
|
||||
'github.com',
|
||||
'gitlab.com',
|
||||
'bitbucket.org',
|
||||
'pypi.org',
|
||||
'files.pythonhosted.org'
|
||||
}
|
||||
|
||||
BLOCKED_PACKAGES: Set[str] = {
|
||||
# Add known malicious packages
|
||||
}
|
||||
|
||||
def validate_package_url(self, package_url: str) -> bool:
|
||||
"""Validate package URL against security policies."""
|
||||
|
||||
# Check blocked packages
|
||||
if self._is_blocked_package(package_url):
|
||||
return False
|
||||
|
||||
# Validate domain for HTTP/Git URLs
|
||||
if package_url.startswith(('http', 'git+')):
|
||||
parsed = urlparse(package_url.replace('git+', ''))
|
||||
if parsed.hostname not in self.ALLOWED_DOMAINS:
|
||||
return False
|
||||
|
||||
# Additional security checks
|
||||
return self._validate_package_name(package_url)
|
||||
|
||||
def _is_blocked_package(self, package_url: str) -> bool:
|
||||
"""Check if package is in blocklist."""
|
||||
for blocked in self.BLOCKED_PACKAGES:
|
||||
if blocked in package_url.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
def _validate_package_name(self, package_url: str) -> bool:
|
||||
"""Validate package name format."""
|
||||
# Basic validation for malicious patterns
|
||||
malicious_patterns = [
|
||||
r'\.\./', # Path traversal
|
||||
r'[;&|`$]', # Command injection
|
||||
r'<script', # XSS attempts
|
||||
]
|
||||
|
||||
for pattern in malicious_patterns:
|
||||
if re.search(pattern, package_url, re.IGNORECASE):
|
||||
return False
|
||||
|
||||
return True
|
||||
```
|
||||
|
||||
## 5. Implementation Plan
|
||||
|
||||
All implementation must pass existing lints and tests. New functionality requires comprehensive test coverage including unit tests, integration tests, and end-to-end scenarios.
|
||||
|
||||
### 5.1 Dynamic Agent Loading Foundation (M1)
|
||||
|
||||
#### 5.1.1 Dynamic Agent Loader Implementation
|
||||
|
||||
* `openhands/agent_server/dynamic_agent_loader.py`
|
||||
* `tests/unit/agent_server/test_dynamic_agent_loader.py`
|
||||
|
||||
Implement core dynamic agent loading functionality with package installation, module importing, and agent instantiation.
|
||||
|
||||
#### 5.1.2 Package Security Validation
|
||||
|
||||
* `openhands/agent_server/security/package_validator.py`
|
||||
* `tests/unit/agent_server/security/test_package_validator.py`
|
||||
|
||||
Add security validation for custom agent packages including domain allowlists and malicious pattern detection.
|
||||
|
||||
**Demo**: Load a simple custom agent from a Git repository and verify it responds to basic queries through the existing `/ask_agent` HTTP API.
|
||||
|
||||
### 5.2 Sandbox Service Integration (M2)
|
||||
|
||||
#### 5.2.1 Enhanced Sandbox Specification
|
||||
|
||||
* `openhands/app_server/sandbox/docker_sandbox_spec_service.py` (modifications)
|
||||
* `tests/unit/app_server/sandbox/test_docker_sandbox_spec_service.py` (enhancements)
|
||||
|
||||
Extend existing sandbox service to support dynamic agent loading configuration through environment variables.
|
||||
|
||||
#### 5.2.2 Agent Server Startup Integration
|
||||
|
||||
* `openhands/agent_server/conversation_service.py` (modifications)
|
||||
* `openhands/agent_server/api.py` (startup enhancements)
|
||||
* `tests/unit/agent_server/test_conversation_service.py` (enhancements)
|
||||
|
||||
Integrate dynamic agent loading into agent server startup and conversation management processes.
|
||||
|
||||
**Demo**: Create conversations with custom agents specified via environment variables and demonstrate proper agent instantiation and tool execution.
|
||||
|
||||
### 5.3 API Integration (M3)
|
||||
|
||||
#### 5.3.1 Enhanced Conversation Creation API
|
||||
|
||||
* `openhands/server/routes/conversation_routes.py` (modifications)
|
||||
* `tests/unit/server/routes/test_conversation_routes.py` (enhancements)
|
||||
|
||||
Extend conversation creation API to accept agent package URLs and configuration parameters.
|
||||
|
||||
#### 5.3.2 Error Handling and Fallback
|
||||
|
||||
* `openhands/agent_server/dynamic_agent_loader.py` (enhancements)
|
||||
* `tests/unit/agent_server/test_dynamic_agent_fallback.py`
|
||||
|
||||
Implement comprehensive error handling with fallback to default agents when custom agent loading fails.
|
||||
|
||||
**Demo**: Create conversations through API endpoints with various package URL formats (Git, PyPI, ZIP) and demonstrate proper error handling and fallback behavior.
|
||||
|
||||
### 5.4 Advanced Features and Optimization (M4)
|
||||
|
||||
#### 5.4.1 Agent Caching and Performance
|
||||
|
||||
* `openhands/agent_server/agent_cache.py`
|
||||
* `tests/unit/agent_server/test_agent_cache.py`
|
||||
|
||||
Implement agent instance caching to avoid repeated package installation and improve performance for multiple conversations with the same custom agent.
|
||||
|
||||
#### 5.4.2 Package Management and Cleanup
|
||||
|
||||
* `openhands/agent_server/package_manager.py`
|
||||
* `tests/unit/agent_server/test_package_manager.py`
|
||||
|
||||
Add package lifecycle management including cleanup of unused packages and version management for package updates.
|
||||
|
||||
**Demo**: Deploy multiple conversations with different custom agents simultaneously and demonstrate proper resource management, caching, and cleanup behavior.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
This design document is based on analysis of the following source materials:
|
||||
|
||||
1. **OpenHands V1 Architecture**: Analysis of `openhands/app_server/sandbox/docker_sandbox_spec_service.py` and `openhands/app_server/event_callback/github_v1_callback_processor.py` for understanding the V1 flow and agent server integration.
|
||||
|
||||
2. **Software Agent SDK**: Analysis of the `software-agent-sdk` repository, specifically:
|
||||
- `openhands-agent-server/openhands/agent_server/conversation_router.py` for HTTP API patterns
|
||||
- `openhands-sdk/openhands/sdk/agent/base.py` for agent interface requirements
|
||||
- `examples/01_standalone_sdk/02_custom_tools.py` for custom agent implementation patterns
|
||||
|
||||
3. **Agent Server Models**: Analysis of `openhands.agent_server.models` imports in the main OpenHands codebase for understanding the API contract between main server and agent server.
|
||||
|
||||
4. **Container Architecture**: Analysis of `AGENT_SERVER_IMAGE` constant usage in `openhands/app_server/sandbox/sandbox_spec_service.py` for understanding the current container deployment model.
|
||||
|
||||
All technical specifications and implementation details are derived from examination of the existing codebase and established patterns within the OpenHands ecosystem.
|
||||
@@ -292,26 +292,18 @@ class GithubManager(Manager):
|
||||
f'[GitHub] Created conversation {conversation_id} for user {user_info.username}'
|
||||
)
|
||||
|
||||
from openhands.server.shared import ConversationStoreImpl, config
|
||||
|
||||
conversation_store = await ConversationStoreImpl.get_instance(
|
||||
config, github_view.user_info.keycloak_user_id
|
||||
# Create a GithubCallbackProcessor
|
||||
processor = GithubCallbackProcessor(
|
||||
github_view=github_view,
|
||||
send_summary_instruction=True,
|
||||
)
|
||||
metadata = await conversation_store.get_metadata(conversation_id)
|
||||
|
||||
if metadata.conversation_version != 'v1':
|
||||
# Create a GithubCallbackProcessor
|
||||
processor = GithubCallbackProcessor(
|
||||
github_view=github_view,
|
||||
send_summary_instruction=True,
|
||||
)
|
||||
# Register the callback processor
|
||||
register_callback_processor(conversation_id, processor)
|
||||
|
||||
# Register the callback processor
|
||||
register_callback_processor(conversation_id, processor)
|
||||
|
||||
logger.info(
|
||||
f'[Github] Registered callback processor for conversation {conversation_id}'
|
||||
)
|
||||
logger.info(
|
||||
f'[Github] Registered callback processor for conversation {conversation_id}'
|
||||
)
|
||||
|
||||
# Send message with conversation link
|
||||
conversation_link = CONVERSATION_URL.format(conversation_id)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from uuid import UUID, uuid4
|
||||
from uuid import uuid4
|
||||
|
||||
from github import Github, GithubIntegration
|
||||
from github.Issue import Issue
|
||||
@@ -26,22 +26,10 @@ from storage.proactive_conversation_store import ProactiveConversationStore
|
||||
from storage.saas_secrets_store import SaasSecretsStore
|
||||
from storage.saas_settings_store import SaasSettingsStore
|
||||
|
||||
from openhands.agent_server.models import SendMessageRequest
|
||||
from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
AppConversationStartRequest,
|
||||
AppConversationStartTaskStatus,
|
||||
)
|
||||
from openhands.app_server.config import get_app_conversation_service
|
||||
from openhands.app_server.services.injector import InjectorState
|
||||
from openhands.app_server.user.specifiy_user_context import USER_CONTEXT_ATTR
|
||||
from openhands.app_server.user.user_context import UserContext
|
||||
from openhands.app_server.user.user_models import UserInfo
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.integrations.github.github_service import GithubServiceImpl
|
||||
from openhands.integrations.provider import PROVIDER_TOKEN_TYPE, ProviderType
|
||||
from openhands.integrations.service_types import Comment
|
||||
from openhands.sdk import TextContent
|
||||
from openhands.sdk.conversation.secret_source import SecretSource
|
||||
from openhands.server.services.conversation_service import (
|
||||
initialize_conversation,
|
||||
start_conversation,
|
||||
@@ -55,49 +43,6 @@ from openhands.utils.async_utils import call_sync_from_async
|
||||
OH_LABEL, INLINE_OH_LABEL = get_oh_labels(HOST)
|
||||
|
||||
|
||||
class GithubUserContext(UserContext):
|
||||
"""User context for GitHub integration that provides user info without web request."""
|
||||
|
||||
def __init__(self, keycloak_user_id: str, git_provider_tokens: PROVIDER_TOKEN_TYPE):
|
||||
self.keycloak_user_id = keycloak_user_id
|
||||
self.git_provider_tokens = git_provider_tokens
|
||||
self.settings_store = SaasSettingsStore(
|
||||
user_id=self.keycloak_user_id,
|
||||
session_maker=session_maker,
|
||||
config=get_config(),
|
||||
)
|
||||
|
||||
self.secrets_store = SaasSecretsStore(
|
||||
self.keycloak_user_id, session_maker, get_config()
|
||||
)
|
||||
|
||||
async def get_user_id(self) -> str | None:
|
||||
return self.keycloak_user_id
|
||||
|
||||
async def get_user_info(self) -> UserInfo:
|
||||
user_settings = await self.settings_store.load()
|
||||
return UserInfo(
|
||||
id=self.keycloak_user_id,
|
||||
**user_settings.model_dump(context={'expose_secrets': True}),
|
||||
)
|
||||
|
||||
async def get_authenticated_git_url(self, repository: str) -> str:
|
||||
# This would need to be implemented based on the git provider tokens
|
||||
# For now, return a basic HTTPS URL
|
||||
return f'https://github.com/{repository}.git'
|
||||
|
||||
async def get_latest_token(self, provider_type: ProviderType) -> str | None:
|
||||
# Return the appropriate token from git_provider_tokens
|
||||
if provider_type == ProviderType.GITHUB and self.git_provider_tokens:
|
||||
return self.git_provider_tokens.get(ProviderType.GITHUB)
|
||||
return None
|
||||
|
||||
async def get_secrets(self) -> dict[str, SecretSource]:
|
||||
# Return empty dict for now - GitHub integration handles secrets separately
|
||||
user_secrets = await self.secrets_store.load()
|
||||
return dict(user_secrets.custom_secrets) if user_secrets else {}
|
||||
|
||||
|
||||
async def get_user_proactive_conversation_setting(user_id: str | None) -> bool:
|
||||
"""Get the user's proactive conversation setting.
|
||||
|
||||
@@ -131,35 +76,6 @@ async def get_user_proactive_conversation_setting(user_id: str | None) -> bool:
|
||||
return settings.enable_proactive_conversation_starters
|
||||
|
||||
|
||||
async def get_user_v1_enabled_setting(user_id: str | None) -> bool:
|
||||
"""Get the user's V1 conversation API setting.
|
||||
|
||||
Args:
|
||||
user_id: The keycloak user ID
|
||||
|
||||
Returns:
|
||||
True if V1 conversations are enabled for this user, False otherwise
|
||||
"""
|
||||
|
||||
# If no user ID is provided, we can't check user settings
|
||||
if not user_id:
|
||||
return False
|
||||
|
||||
config = get_config()
|
||||
settings_store = SaasSettingsStore(
|
||||
user_id=user_id, session_maker=session_maker, config=config
|
||||
)
|
||||
|
||||
settings = await call_sync_from_async(
|
||||
settings_store.get_user_settings_by_keycloak_id, user_id
|
||||
)
|
||||
|
||||
if not settings or settings.v1_enabled is None:
|
||||
return False
|
||||
|
||||
return settings.v1_enabled
|
||||
|
||||
|
||||
# =================================================
|
||||
# SECTION: Github view types
|
||||
# =================================================
|
||||
@@ -243,31 +159,6 @@ class GithubIssue(ResolverViewInterface):
|
||||
git_provider_tokens: PROVIDER_TOKEN_TYPE,
|
||||
conversation_metadata: ConversationMetadata,
|
||||
):
|
||||
v1_enabled = await get_user_v1_enabled_setting(self.user_info.keycloak_user_id)
|
||||
|
||||
if v1_enabled:
|
||||
try:
|
||||
# Use V1 app conversation service
|
||||
await self._create_v1_conversation(
|
||||
jinja_env, git_provider_tokens, conversation_metadata
|
||||
)
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f'Error checking V1 settings, falling back to V0: {e}')
|
||||
|
||||
# Use existing V0 conversation service
|
||||
await self._create_v0_conversation(
|
||||
jinja_env, git_provider_tokens, conversation_metadata
|
||||
)
|
||||
|
||||
async def _create_v0_conversation(
|
||||
self,
|
||||
jinja_env: Environment,
|
||||
git_provider_tokens: PROVIDER_TOKEN_TYPE,
|
||||
conversation_metadata: ConversationMetadata,
|
||||
):
|
||||
"""Create conversation using the legacy V0 system."""
|
||||
custom_secrets = await self._get_user_secrets()
|
||||
|
||||
user_instructions, conversation_instructions = await self._get_instructions(
|
||||
@@ -286,77 +177,6 @@ class GithubIssue(ResolverViewInterface):
|
||||
conversation_instructions=conversation_instructions,
|
||||
)
|
||||
|
||||
async def _create_v1_conversation(
|
||||
self,
|
||||
jinja_env: Environment,
|
||||
git_provider_tokens: PROVIDER_TOKEN_TYPE,
|
||||
conversation_metadata: ConversationMetadata,
|
||||
):
|
||||
"""Create conversation using the new V1 app conversation system."""
|
||||
user_instructions, conversation_instructions = await self._get_instructions(
|
||||
jinja_env
|
||||
)
|
||||
|
||||
# Create the initial message request
|
||||
initial_message = SendMessageRequest(
|
||||
role='user', content=[TextContent(text=user_instructions)]
|
||||
)
|
||||
|
||||
# Create the GitHub V1 callback processor
|
||||
github_callback_processor = self._create_github_v1_callback_processor()
|
||||
|
||||
# Get the app conversation service and start the conversation
|
||||
injector_state = InjectorState()
|
||||
|
||||
# Create the V1 conversation start request with the callback processor
|
||||
start_request = AppConversationStartRequest(
|
||||
conversation_id=UUID(conversation_metadata.conversation_id),
|
||||
system_message_suffix=conversation_instructions,
|
||||
initial_message=initial_message,
|
||||
selected_repository=self.full_repo_name,
|
||||
git_provider=ProviderType.GITHUB,
|
||||
title=f'GitHub Issue #{self.issue_number}: {self.title}',
|
||||
trigger=ConversationTrigger.RESOLVER,
|
||||
processors=[
|
||||
github_callback_processor
|
||||
], # Pass the callback processor directly
|
||||
)
|
||||
|
||||
# Set up the GitHub user context for the V1 system
|
||||
github_user_context = GithubUserContext(
|
||||
keycloak_user_id=self.user_info.keycloak_user_id,
|
||||
git_provider_tokens=git_provider_tokens,
|
||||
)
|
||||
setattr(injector_state, USER_CONTEXT_ATTR, github_user_context)
|
||||
|
||||
async with get_app_conversation_service(
|
||||
injector_state
|
||||
) as app_conversation_service:
|
||||
async for task in app_conversation_service.start_app_conversation(
|
||||
start_request
|
||||
):
|
||||
if task.status == AppConversationStartTaskStatus.ERROR:
|
||||
logger.error(f'Failed to start V1 conversation: {task.detail}')
|
||||
raise RuntimeError(
|
||||
f'Failed to start V1 conversation: {task.detail}'
|
||||
)
|
||||
|
||||
def _create_github_v1_callback_processor(self):
|
||||
"""Create a V1 callback processor for GitHub integration."""
|
||||
from openhands.app_server.event_callback.github_v1_callback_processor import (
|
||||
GithubV1CallbackProcessor,
|
||||
)
|
||||
|
||||
# Create and return the GitHub V1 callback processor
|
||||
return GithubV1CallbackProcessor(
|
||||
github_view_data={
|
||||
'issue_number': self.issue_number,
|
||||
'full_repo_name': self.full_repo_name,
|
||||
'installation_id': self.installation_id,
|
||||
},
|
||||
send_summary_instruction=self.send_summary_instruction,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GithubIssueComment(GithubIssue):
|
||||
@@ -472,24 +292,6 @@ class GithubInlinePRComment(GithubPRComment):
|
||||
|
||||
return user_instructions, conversation_instructions
|
||||
|
||||
def _create_github_v1_callback_processor(self):
|
||||
"""Create a V1 callback processor for GitHub integration."""
|
||||
from openhands.app_server.event_callback.github_v1_callback_processor import (
|
||||
GithubV1CallbackProcessor,
|
||||
)
|
||||
|
||||
# Create and return the GitHub V1 callback processor
|
||||
return GithubV1CallbackProcessor(
|
||||
github_view_data={
|
||||
'issue_number': self.issue_number,
|
||||
'full_repo_name': self.full_repo_name,
|
||||
'installation_id': self.installation_id,
|
||||
'comment_id': self.comment_id,
|
||||
},
|
||||
inline_pr_comment=True,
|
||||
send_summary_instruction=self.send_summary_instruction,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GithubFailingAction:
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
"""Add v1_enabled column to user_settings
|
||||
|
||||
Revision ID: 083
|
||||
Revises: 082
|
||||
Create Date: 2025-11-18 00:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '083'
|
||||
down_revision: Union[str, None] = '082'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Add v1_enabled column to user_settings table."""
|
||||
op.add_column(
|
||||
'user_settings',
|
||||
sa.Column(
|
||||
'v1_enabled',
|
||||
sa.Boolean(),
|
||||
nullable=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove v1_enabled column from user_settings table."""
|
||||
op.drop_column('user_settings', 'v1_enabled')
|
||||
50
enterprise/poetry.lock
generated
50
enterprise/poetry.lock
generated
@@ -5820,15 +5820,16 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0
|
||||
|
||||
[[package]]
|
||||
name = "openhands-agent-server"
|
||||
version = "1.3.0"
|
||||
version = "1.1.0"
|
||||
description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
|
||||
optional = false
|
||||
python-versions = ">=3.12"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "openhands_agent_server-1.3.0-py3-none-any.whl", hash = "sha256:2f87f790c740dc3fb81821c5f9fa375af875fbb937ebca3baa6dc5c035035b3c"},
|
||||
{file = "openhands_agent_server-1.3.0.tar.gz", hash = "sha256:0a83ae77373f5c41d0ba0e22d8f0f6144d54d55784183a50b7c098c96cd5135c"},
|
||||
{file = "openhands_agent_server-1.1.0-py3-none-any.whl", hash = "sha256:59a856883df23488c0723e47655ef21649a321fcd4709a25a4690866eff6ac88"},
|
||||
{file = "openhands_agent_server-1.1.0.tar.gz", hash = "sha256:e39bebd39afd45cfcfd765005e7c4e5409e46678bd7612ae20bae79f7057b935"},
|
||||
]
|
||||
develop = false
|
||||
|
||||
[package.dependencies]
|
||||
aiosqlite = ">=0.19"
|
||||
@@ -5841,9 +5842,16 @@ uvicorn = ">=0.31.1"
|
||||
websockets = ">=12"
|
||||
wsproto = ">=1.2.0"
|
||||
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/OpenHands/agent-sdk.git"
|
||||
reference = "15f565b8ac38876e40dc05c08e2b04ccaae4a66d"
|
||||
resolved_reference = "15f565b8ac38876e40dc05c08e2b04ccaae4a66d"
|
||||
subdirectory = "openhands-agent-server"
|
||||
|
||||
[[package]]
|
||||
name = "openhands-ai"
|
||||
version = "0.62.0"
|
||||
version = "0.0.0-post.5576+ed2ac6040"
|
||||
description = "OpenHands: Code Less, Make More"
|
||||
optional = false
|
||||
python-versions = "^3.12,<3.14"
|
||||
@@ -5885,9 +5893,9 @@ memory-profiler = "^0.61.0"
|
||||
numpy = "*"
|
||||
openai = "1.99.9"
|
||||
openhands-aci = "0.3.2"
|
||||
openhands-agent-server = "1.3.0"
|
||||
openhands-sdk = "1.3.0"
|
||||
openhands-tools = "1.3.0"
|
||||
openhands-agent-server = "1.1.0"
|
||||
openhands-sdk = "1.1.0"
|
||||
openhands-tools = "1.1.0"
|
||||
opentelemetry-api = "^1.33.1"
|
||||
opentelemetry-exporter-otlp-proto-grpc = "^1.33.1"
|
||||
pathspec = "^0.12.1"
|
||||
@@ -5943,15 +5951,16 @@ url = ".."
|
||||
|
||||
[[package]]
|
||||
name = "openhands-sdk"
|
||||
version = "1.3.0"
|
||||
version = "1.1.0"
|
||||
description = "OpenHands SDK - Core functionality for building AI agents"
|
||||
optional = false
|
||||
python-versions = ">=3.12"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "openhands_sdk-1.3.0-py3-none-any.whl", hash = "sha256:feee838346f8e60ea3e4d3391de7cb854314eb8b3c9e3dbbb56f98a784aadc56"},
|
||||
{file = "openhands_sdk-1.3.0.tar.gz", hash = "sha256:2d060803a78de462121b56dea717a66356922deb02276f37b29fae8af66343fb"},
|
||||
{file = "openhands_sdk-1.1.0-py3-none-any.whl", hash = "sha256:4a984ce1687a48cf99a67fdf3d37b116f8b2840743d4807810b5024af6a1d57e"},
|
||||
{file = "openhands_sdk-1.1.0.tar.gz", hash = "sha256:855e0d8f3657205e4119e50520c17e65b3358b1a923f7a051a82512a54bf426c"},
|
||||
]
|
||||
develop = false
|
||||
|
||||
[package.dependencies]
|
||||
deprecation = ">=2.1.0"
|
||||
@@ -5968,17 +5977,25 @@ websockets = ">=12"
|
||||
[package.extras]
|
||||
boto3 = ["boto3 (>=1.35.0)"]
|
||||
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/OpenHands/agent-sdk.git"
|
||||
reference = "15f565b8ac38876e40dc05c08e2b04ccaae4a66d"
|
||||
resolved_reference = "15f565b8ac38876e40dc05c08e2b04ccaae4a66d"
|
||||
subdirectory = "openhands-sdk"
|
||||
|
||||
[[package]]
|
||||
name = "openhands-tools"
|
||||
version = "1.3.0"
|
||||
version = "1.1.0"
|
||||
description = "OpenHands Tools - Runtime tools for AI agents"
|
||||
optional = false
|
||||
python-versions = ">=3.12"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "openhands_tools-1.3.0-py3-none-any.whl", hash = "sha256:f31056d87c3058ac92709f9161c7c602daeee3ed0cb4439097b43cda105ed03e"},
|
||||
{file = "openhands_tools-1.3.0.tar.gz", hash = "sha256:3da46f09e28593677d3e17252ce18584fcc13caab1a73213e66bd7edca2cebe0"},
|
||||
{file = "openhands_tools-1.1.0-py3-none-any.whl", hash = "sha256:767d6746f05edade49263aa24450a037485a3dc23379f56917ef19aad22033f9"},
|
||||
{file = "openhands_tools-1.1.0.tar.gz", hash = "sha256:c2fadaa4f4e16e9a3df5781ea847565dcae7171584f09ef7c0e1d97c8dfc83f6"},
|
||||
]
|
||||
develop = false
|
||||
|
||||
[package.dependencies]
|
||||
bashlex = ">=0.18"
|
||||
@@ -5990,6 +6007,13 @@ libtmux = ">=0.46.2"
|
||||
openhands-sdk = "*"
|
||||
pydantic = ">=2.11.7"
|
||||
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/OpenHands/agent-sdk.git"
|
||||
reference = "15f565b8ac38876e40dc05c08e2b04ccaae4a66d"
|
||||
resolved_reference = "15f565b8ac38876e40dc05c08e2b04ccaae4a66d"
|
||||
subdirectory = "openhands-tools"
|
||||
|
||||
[[package]]
|
||||
name = "openpyxl"
|
||||
version = "3.1.5"
|
||||
|
||||
@@ -30,11 +30,3 @@ JIRA_DC_CLIENT_SECRET = os.getenv('JIRA_DC_CLIENT_SECRET', '').strip()
|
||||
JIRA_DC_BASE_URL = os.getenv('JIRA_DC_BASE_URL', '').strip()
|
||||
JIRA_DC_ENABLE_OAUTH = os.getenv('JIRA_DC_ENABLE_OAUTH', '1') in ('1', 'true')
|
||||
AUTH_URL = os.getenv('AUTH_URL', '').rstrip('/')
|
||||
ROLE_CHECK_ENABLED = os.getenv('ROLE_CHECK_ENABLED', 'false').lower() in (
|
||||
'1',
|
||||
'true',
|
||||
't',
|
||||
'yes',
|
||||
'y',
|
||||
'on',
|
||||
)
|
||||
|
||||
@@ -12,7 +12,6 @@ from server.auth.constants import (
|
||||
KEYCLOAK_CLIENT_ID,
|
||||
KEYCLOAK_REALM_NAME,
|
||||
KEYCLOAK_SERVER_URL_EXT,
|
||||
ROLE_CHECK_ENABLED,
|
||||
)
|
||||
from server.auth.gitlab_sync import schedule_gitlab_repo_sync
|
||||
from server.auth.saas_user_auth import SaasUserAuth
|
||||
@@ -31,6 +30,7 @@ from openhands.server.services.conversation_service import create_provider_token
|
||||
from openhands.server.shared import config
|
||||
from openhands.server.user_auth import get_access_token
|
||||
from openhands.server.user_auth.user_auth import get_user_auth
|
||||
from openhands.utils.posthog_tracker import track_user_signup_completed
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore')
|
||||
@@ -133,12 +133,6 @@ async def keycloak_callback(
|
||||
|
||||
user_info = await token_manager.get_user_info(keycloak_access_token)
|
||||
logger.debug(f'user_info: {user_info}')
|
||||
if ROLE_CHECK_ENABLED and 'roles' not in user_info:
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
content={'error': 'Missing required role'},
|
||||
)
|
||||
|
||||
if 'sub' not in user_info or 'preferred_username' not in user_info:
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
@@ -369,6 +363,12 @@ async def accept_tos(request: Request):
|
||||
|
||||
logger.info(f'User {user_id} accepted TOS')
|
||||
|
||||
# Track user signup completion in PostHog
|
||||
track_user_signup_completed(
|
||||
user_id=user_id,
|
||||
signup_timestamp=user_settings.accepted_tos.isoformat(),
|
||||
)
|
||||
|
||||
response = JSONResponse(
|
||||
status_code=status.HTTP_200_OK, content={'redirect_url': redirect_url}
|
||||
)
|
||||
|
||||
@@ -28,6 +28,7 @@ from storage.subscription_access import SubscriptionAccess
|
||||
|
||||
from openhands.server.user_auth import get_user_id
|
||||
from openhands.utils.http_session import httpx_verify_option
|
||||
from openhands.utils.posthog_tracker import track_credits_purchased
|
||||
|
||||
stripe.api_key = STRIPE_API_KEY
|
||||
billing_router = APIRouter(prefix='/api/billing')
|
||||
@@ -457,6 +458,20 @@ async def success_callback(session_id: str, request: Request):
|
||||
)
|
||||
session.commit()
|
||||
|
||||
# Track credits purchased in PostHog
|
||||
try:
|
||||
track_credits_purchased(
|
||||
user_id=billing_session.user_id,
|
||||
amount_usd=amount_subtotal / 100, # Convert cents to dollars
|
||||
credits_added=add_credits,
|
||||
stripe_session_id=session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f'Failed to track credits purchase: {e}',
|
||||
extra={'user_id': billing_session.user_id, 'error': str(e)},
|
||||
)
|
||||
|
||||
return RedirectResponse(
|
||||
f'{request.base_url}settings/billing?checkout=success', status_code=302
|
||||
)
|
||||
|
||||
@@ -97,10 +97,6 @@ class SaasSettingsStore(SettingsStore):
|
||||
return settings
|
||||
|
||||
async def store(self, item: Settings):
|
||||
# Check if provider is OpenHands and generate API key if needed
|
||||
if item and self._is_openhands_provider(item):
|
||||
await self._ensure_openhands_api_key(item)
|
||||
|
||||
with self.session_maker() as session:
|
||||
existing = None
|
||||
kwargs = {}
|
||||
@@ -372,30 +368,6 @@ class SaasSettingsStore(SettingsStore):
|
||||
def _should_encrypt(self, key: str) -> bool:
|
||||
return key in ('llm_api_key', 'llm_api_key_for_byor', 'search_api_key')
|
||||
|
||||
def _is_openhands_provider(self, item: Settings) -> bool:
|
||||
"""Check if the settings use the OpenHands provider."""
|
||||
return bool(item.llm_model and item.llm_model.startswith('openhands/'))
|
||||
|
||||
async def _ensure_openhands_api_key(self, item: Settings) -> None:
|
||||
"""Generate and set the OpenHands API key for the given settings.
|
||||
|
||||
First checks if an existing key with the OpenHands alias exists,
|
||||
and reuses it if found. Otherwise, generates a new key.
|
||||
"""
|
||||
# Generate new key if none exists
|
||||
generated_key = await self._generate_openhands_key()
|
||||
if generated_key:
|
||||
item.llm_api_key = SecretStr(generated_key)
|
||||
logger.info(
|
||||
'saas_settings_store:store:generated_openhands_key',
|
||||
extra={'user_id': self.user_id},
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
'saas_settings_store:store:failed_to_generate_openhands_key',
|
||||
extra={'user_id': self.user_id},
|
||||
)
|
||||
|
||||
async def _create_user_in_lite_llm(
|
||||
self, client: httpx.AsyncClient, email: str | None, max_budget: int, spend: int
|
||||
):
|
||||
@@ -418,55 +390,3 @@ class SaasSettingsStore(SettingsStore):
|
||||
},
|
||||
)
|
||||
return response
|
||||
|
||||
async def _generate_openhands_key(self) -> str | None:
|
||||
"""Generate a new OpenHands provider key for a user."""
|
||||
if not (LITE_LLM_API_KEY and LITE_LLM_API_URL):
|
||||
logger.warning(
|
||||
'saas_settings_store:_generate_openhands_key:litellm_config_not_found',
|
||||
extra={'user_id': self.user_id},
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
verify=httpx_verify_option(),
|
||||
headers={
|
||||
'x-goog-api-key': LITE_LLM_API_KEY,
|
||||
},
|
||||
) as client:
|
||||
response = await client.post(
|
||||
f'{LITE_LLM_API_URL}/key/generate',
|
||||
json={
|
||||
'user_id': self.user_id,
|
||||
'metadata': {'type': 'openhands'},
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
response_json = response.json()
|
||||
key = response_json.get('key')
|
||||
|
||||
if key:
|
||||
logger.info(
|
||||
'saas_settings_store:_generate_openhands_key:success',
|
||||
extra={
|
||||
'user_id': self.user_id,
|
||||
'key_length': len(key) if key else 0,
|
||||
'key_prefix': (
|
||||
key[:10] + '...' if key and len(key) > 10 else key
|
||||
),
|
||||
},
|
||||
)
|
||||
return key
|
||||
else:
|
||||
logger.error(
|
||||
'saas_settings_store:_generate_openhands_key:no_key_in_response',
|
||||
extra={'user_id': self.user_id, 'response_json': response_json},
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
'saas_settings_store:_generate_openhands_key:error',
|
||||
extra={'user_id': self.user_id, 'error': str(e)},
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -38,4 +38,3 @@ class UserSettings(Base): # type: ignore
|
||||
email_verified = Column(Boolean, nullable=True)
|
||||
git_user_name = Column(String, nullable=True)
|
||||
git_user_email = Column(String, nullable=True)
|
||||
v1_enabled = Column(Boolean, nullable=True)
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
from unittest import TestCase, mock
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from integrations.github.github_view import GithubFactory, GithubIssue, get_oh_labels
|
||||
from integrations.github.github_view import GithubFactory, get_oh_labels
|
||||
from integrations.models import Message, SourceType
|
||||
from integrations.types import UserData
|
||||
|
||||
|
||||
class TestGithubLabels(TestCase):
|
||||
@@ -77,128 +75,3 @@ class TestGithubCommentCaseInsensitivity(TestCase):
|
||||
self.assertTrue(GithubFactory.is_issue_comment(message_lower))
|
||||
self.assertTrue(GithubFactory.is_issue_comment(message_upper))
|
||||
self.assertTrue(GithubFactory.is_issue_comment(message_mixed))
|
||||
|
||||
|
||||
class TestGithubV1ConversationRouting(TestCase):
|
||||
"""Test V1 conversation routing logic in GitHub integration."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test fixtures."""
|
||||
# Create a proper UserData instance instead of MagicMock
|
||||
user_data = UserData(
|
||||
user_id=123, username='testuser', keycloak_user_id='test-keycloak-id'
|
||||
)
|
||||
|
||||
# Create a mock raw_payload
|
||||
raw_payload = Message(
|
||||
source=SourceType.GITHUB,
|
||||
message={
|
||||
'payload': {
|
||||
'action': 'opened',
|
||||
'issue': {'number': 123},
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
self.github_issue = GithubIssue(
|
||||
user_info=user_data,
|
||||
full_repo_name='test/repo',
|
||||
issue_number=123,
|
||||
installation_id=456,
|
||||
conversation_id='test-conversation-id',
|
||||
should_extract=True,
|
||||
send_summary_instruction=False,
|
||||
is_public_repo=True,
|
||||
raw_payload=raw_payload,
|
||||
uuid='test-uuid',
|
||||
title='Test Issue',
|
||||
description='Test issue description',
|
||||
previous_comments=[],
|
||||
)
|
||||
|
||||
@patch('integrations.github.github_view.get_user_v1_enabled_setting')
|
||||
@patch.object(GithubIssue, '_create_v0_conversation')
|
||||
@patch.object(GithubIssue, '_create_v1_conversation')
|
||||
async def test_create_new_conversation_routes_to_v0_when_disabled(
|
||||
self, mock_create_v1, mock_create_v0, mock_get_v1_setting
|
||||
):
|
||||
"""Test that conversation creation routes to V0 when v1_enabled is False."""
|
||||
# Mock v1_enabled as False
|
||||
mock_get_v1_setting.return_value = False
|
||||
mock_create_v0.return_value = None
|
||||
mock_create_v1.return_value = None
|
||||
|
||||
# Mock parameters
|
||||
jinja_env = MagicMock()
|
||||
git_provider_tokens = MagicMock()
|
||||
conversation_metadata = MagicMock()
|
||||
|
||||
# Call the method
|
||||
await self.github_issue.create_new_conversation(
|
||||
jinja_env, git_provider_tokens, conversation_metadata
|
||||
)
|
||||
|
||||
# Verify V0 was called and V1 was not
|
||||
mock_create_v0.assert_called_once_with(
|
||||
jinja_env, git_provider_tokens, conversation_metadata
|
||||
)
|
||||
mock_create_v1.assert_not_called()
|
||||
|
||||
@patch('integrations.github.github_view.get_user_v1_enabled_setting')
|
||||
@patch.object(GithubIssue, '_create_v0_conversation')
|
||||
@patch.object(GithubIssue, '_create_v1_conversation')
|
||||
async def test_create_new_conversation_routes_to_v1_when_enabled(
|
||||
self, mock_create_v1, mock_create_v0, mock_get_v1_setting
|
||||
):
|
||||
"""Test that conversation creation routes to V1 when v1_enabled is True."""
|
||||
# Mock v1_enabled as True
|
||||
mock_get_v1_setting.return_value = True
|
||||
mock_create_v0.return_value = None
|
||||
mock_create_v1.return_value = None
|
||||
|
||||
# Mock parameters
|
||||
jinja_env = MagicMock()
|
||||
git_provider_tokens = MagicMock()
|
||||
conversation_metadata = MagicMock()
|
||||
|
||||
# Call the method
|
||||
await self.github_issue.create_new_conversation(
|
||||
jinja_env, git_provider_tokens, conversation_metadata
|
||||
)
|
||||
|
||||
# Verify V1 was called and V0 was not
|
||||
mock_create_v1.assert_called_once_with(
|
||||
jinja_env, git_provider_tokens, conversation_metadata
|
||||
)
|
||||
mock_create_v0.assert_not_called()
|
||||
|
||||
@patch('integrations.github.github_view.get_user_v1_enabled_setting')
|
||||
@patch.object(GithubIssue, '_create_v0_conversation')
|
||||
@patch.object(GithubIssue, '_create_v1_conversation')
|
||||
async def test_create_new_conversation_fallback_on_v1_setting_error(
|
||||
self, mock_create_v1, mock_create_v0, mock_get_v1_setting
|
||||
):
|
||||
"""Test that conversation creation falls back to V0 when _create_v1_conversation fails."""
|
||||
# Mock v1_enabled as True so V1 is attempted
|
||||
mock_get_v1_setting.return_value = True
|
||||
# Mock _create_v1_conversation to raise an exception
|
||||
mock_create_v1.side_effect = Exception('V1 conversation creation failed')
|
||||
mock_create_v0.return_value = None
|
||||
|
||||
# Mock parameters
|
||||
jinja_env = MagicMock()
|
||||
git_provider_tokens = MagicMock()
|
||||
conversation_metadata = MagicMock()
|
||||
|
||||
# Call the method
|
||||
await self.github_issue.create_new_conversation(
|
||||
jinja_env, git_provider_tokens, conversation_metadata
|
||||
)
|
||||
|
||||
# Verify V1 was attempted first, then V0 was called as fallback
|
||||
mock_create_v1.assert_called_once_with(
|
||||
jinja_env, git_provider_tokens, conversation_metadata
|
||||
)
|
||||
mock_create_v0.assert_called_once_with(
|
||||
jinja_env, git_provider_tokens, conversation_metadata
|
||||
)
|
||||
|
||||
@@ -1,65 +0,0 @@
|
||||
# SWE-fficiency Evaluation
|
||||
|
||||
This folder contains the OpenHands inference generation of the [SWE-fficiency benchmark](https://swefficiency.com/) ([paper](https://arxiv.org/pdf/2507.12415v1)).
|
||||
|
||||
The evaluation consists of three steps:
|
||||
|
||||
1. Environment setup: [install python environment](../../README.md#development-environment) and [configure LLM config](../../README.md#configure-openhands-and-your-llm).
|
||||
2. [Run inference](#running-inference-locally-with-docker): Generate a edit patch for each Github issue
|
||||
3. [Evaluate patches](#evaluate-generated-patches)
|
||||
|
||||
## Setup Environment and LLM Configuration
|
||||
|
||||
Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
|
||||
|
||||
## Running inference Locally with Docker
|
||||
|
||||
Make sure your Docker daemon is running, and you have ample disk space (at least 200-500GB, depends on the SWE-PErf set you are running on) for the instance-level docker image.
|
||||
|
||||
When the `run_infer.sh` script is started, it will automatically pull the relevant SWE-Perf images.
|
||||
For example, for instance ID `scikit-learn_scikit-learn-11674`, it will try to pull our pre-build docker image `betty1202/sweb.eval.x86_64.scikit-learn_s_scikit-learn-11674` from DockerHub.
|
||||
This image will be used create an OpenHands runtime image where the agent will operate on.
|
||||
|
||||
```bash
|
||||
./evaluation/benchmarks/swefficiency/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [dataset] [dataset_split] [n_runs] [mode]
|
||||
|
||||
# Example
|
||||
./evaluation/benchmarks/swefficiency/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 500 100 1 swefficiency/swefficiency test
|
||||
```
|
||||
|
||||
where `model_config` is mandatory, and the rest are optional.
|
||||
|
||||
- `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your
|
||||
LLM settings, as defined in your `config.toml`.
|
||||
- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenHands version you would
|
||||
like to evaluate. It could also be a release tag like `0.6.2`.
|
||||
- `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
|
||||
to `CodeActAgent`.
|
||||
- `eval_limit`, e.g. `10`, limits the evaluation to the first `eval_limit` instances. By
|
||||
default, the script evaluates the entire SWE-Perf test set (140 issues). Note:
|
||||
in order to use `eval_limit`, you must also set `agent`.
|
||||
- `max_iter`, e.g. `20`, is the maximum number of iterations for the agent to run. By
|
||||
default, it is set to 100.
|
||||
- `num_workers`, e.g. `3`, is the number of parallel workers to run the evaluation. By
|
||||
default, it is set to 1.
|
||||
- `dataset`, a huggingface dataset name. e.g. `SWE-Perf/SWE-Perf`, specifies which dataset to evaluate on.
|
||||
- `dataset_split`, split for the huggingface dataset. e.g., `test`, `dev`. Default to `test`.
|
||||
|
||||
- `n_runs`, e.g. `3`, is the number of times to run the evaluation. Default is 1.
|
||||
- `mode`, e.g. `swt`, `swt-ci`, or `swe`, specifies the evaluation mode. Default is `swe`.
|
||||
|
||||
> [!CAUTION]
|
||||
> Setting `num_workers` larger than 1 is not officially tested, YMMV.
|
||||
|
||||
|
||||
Let's say you'd like to run 10 instances using `llm.eval_gpt4_1106_preview` and CodeActAgent,
|
||||
|
||||
then your command would be:
|
||||
|
||||
```bash
|
||||
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 10
|
||||
```
|
||||
|
||||
### 2. Run the SWE-fficiency benchmark official evaluation
|
||||
|
||||
Once the output is converted, use the [official SWE-fficiency benchmark evaluation](https://github.com/swefficiency/swefficiency) to evaluate it.
|
||||
@@ -1,52 +0,0 @@
|
||||
"""
|
||||
Utilities for handling binary files and patch generation in SWE-bench evaluation.
|
||||
"""
|
||||
|
||||
|
||||
def remove_binary_diffs(patch_text):
|
||||
"""
|
||||
Remove binary file diffs from a git patch.
|
||||
|
||||
Args:
|
||||
patch_text (str): The git patch text
|
||||
|
||||
Returns:
|
||||
str: The cleaned patch text with binary diffs removed
|
||||
"""
|
||||
lines = patch_text.splitlines()
|
||||
cleaned_lines = []
|
||||
block = []
|
||||
is_binary_block = False
|
||||
|
||||
for line in lines:
|
||||
if line.startswith('diff --git '):
|
||||
if block and not is_binary_block:
|
||||
cleaned_lines.extend(block)
|
||||
block = [line]
|
||||
is_binary_block = False
|
||||
elif 'Binary files' in line:
|
||||
is_binary_block = True
|
||||
block.append(line)
|
||||
else:
|
||||
block.append(line)
|
||||
|
||||
if block and not is_binary_block:
|
||||
cleaned_lines.extend(block)
|
||||
return '\n'.join(cleaned_lines)
|
||||
|
||||
|
||||
def remove_binary_files_from_git():
|
||||
"""
|
||||
Generate a bash command to remove binary files from git staging.
|
||||
|
||||
Returns:
|
||||
str: A bash command that removes binary files from git staging
|
||||
"""
|
||||
return """
|
||||
for file in $(git status --porcelain | grep -E "^(M| M|\\?\\?|A| A)" | cut -c4-); do
|
||||
if [ -f "$file" ] && (file "$file" | grep -q "executable" || git check-attr binary "$file" | grep -q "binary: set"); then
|
||||
git rm -f "$file" 2>/dev/null || rm -f "$file"
|
||||
echo "Removed: $file"
|
||||
fi
|
||||
done
|
||||
""".strip()
|
||||
@@ -1,960 +0,0 @@
|
||||
import asyncio
|
||||
import copy
|
||||
import functools
|
||||
import json
|
||||
import multiprocessing
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Any, Literal
|
||||
|
||||
import pandas as pd
|
||||
import toml
|
||||
from datasets import load_dataset
|
||||
|
||||
import openhands.agenthub
|
||||
from evaluation.benchmarks.swe_bench.binary_patch_utils import (
|
||||
remove_binary_diffs,
|
||||
remove_binary_files_from_git,
|
||||
)
|
||||
from evaluation.utils.shared import (
|
||||
EvalException,
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_llm_config_for_completions_logging,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AgentConfig,
|
||||
OpenHandsConfig,
|
||||
get_evaluation_parser,
|
||||
get_llm_config_arg,
|
||||
)
|
||||
from openhands.core.config.condenser_config import NoOpCondenserConfig
|
||||
from openhands.core.config.utils import get_condenser_config_arg
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime, run_controller
|
||||
from openhands.critic import AgentFinishedCritic
|
||||
from openhands.events.action import CmdRunAction, FileReadAction, MessageAction
|
||||
from openhands.events.observation import (
|
||||
CmdOutputObservation,
|
||||
ErrorObservation,
|
||||
FileReadObservation,
|
||||
)
|
||||
from openhands.events.serialization.event import event_from_dict, event_to_dict
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.utils.async_utils import call_async_from_sync
|
||||
from openhands.utils.shutdown_listener import sleep_if_should_continue
|
||||
|
||||
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
|
||||
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
|
||||
BenchMode = Literal['swe', 'swt', 'swt-ci']
|
||||
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
}
|
||||
|
||||
|
||||
def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
|
||||
return f'{instance.repo}__{instance.version}'.replace('/', '__')
|
||||
|
||||
|
||||
def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageAction:
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
|
||||
# TODO: Change to testbed?
|
||||
instruction = f"""
|
||||
<uploaded_files>
|
||||
/workspace/{workspace_dir_name}
|
||||
</uploaded_files>
|
||||
|
||||
I’ve uploaded a python code repository in the directory workspace_dir_name. Consider the following performance workload and `workload()` function showing an specific usage of the repository:
|
||||
<performance_workload>
|
||||
{instance.workload}
|
||||
</performance_workload>
|
||||
|
||||
Can you help me implement the necessary changes to the repository so that the runtime of the `workload()` function is faster? Basic guidelines:
|
||||
1. Your task is to make changes to non-test files in the /workspace directory to improve the performance of the code running in `workload()`. Please do not directly change the implementation of the `workload()` function to optimize things: I want you to focus on making the workload AS IS run faster by only editing the repository containing code that the `workload()` function calls.
|
||||
2. Make changes while ensuring the repository is functionally equivalent to the original: your changes should not introduce new bugs or cause already-passing tests to begin failing after your changes. However, you do not need to worry about tests that already fail without any changes made. For relevant test files you find in the repository, you can run them via the bash command `{instance.test_cmd} <test_file>` to check for correctness. Note that running all the tests may take a long time, so you need to determine which tests are relevant to your changes.
|
||||
3. Make sure the `workload()` function improves in performance after you make changes to the repository. The workload can potentially take some time to run, so please allow it to finish and be generous with setting your timeout parameter (a timeout value of 3600 or larger here is encouraged): for faster iteration, you should adjust the workload script to use fewer iterations. Before you complete your task, please make sure to check that the **original performance workload** and `workload()` function runs successfully and the performance is improved.
|
||||
4. You may need to reinstall/rebuild the repo for your changes to take effect before testing if you made non-Python changes. Reinstalling may take a long time to run (a timeout value of 3600 or larger here is encouraged), so please be patient with running it and allow it to complete if possible. You can reinstall the repository by running the bash command `{instance.rebuild_cmd}` in the workspace directory.
|
||||
5. All the dependencies required to run the `workload()` function are already installed in the environment. You should not install or upgrade any dependencies.
|
||||
|
||||
Follow these steps to improve performance:
|
||||
1. As a first step, explore the repository structure.
|
||||
2. Create a Python script to reproduce the performance workload, execute it with python <workload_file>, and examine the printed output metrics.
|
||||
3. Edit the source code of the repository to improve performance. Please do not change the contents of the `workload()` function itself, but focus on optimizing the code in the repository that the original `workload()` function uses.
|
||||
4. If non-Python changes were made, rebuild the repo to make sure the changes take effect.
|
||||
5. Rerun your script to confirm that performance has improved.
|
||||
6. If necessary, identify any relevant test files in the repository related to your changes and verify that test statuses did not change after your modifications.
|
||||
7. After each attempted change, please reflect on the changes attempted and the performance impact observed. If the performance did not improve, consider alternative approaches or optimizations.
|
||||
8. Once you are satisfied, please use the finish command to complete your task.
|
||||
|
||||
Please remember that you should not change the implementation of the `workload()` function. The performance improvement should solely come from editing the source files in the code repository.
|
||||
"""
|
||||
|
||||
if RUN_WITH_BROWSING:
|
||||
instruction += (
|
||||
'<IMPORTANT!>\nYou SHOULD NEVER attempt to browse the web. </IMPORTANT!>\n'
|
||||
)
|
||||
|
||||
return MessageAction(content=instruction)
|
||||
|
||||
|
||||
def get_instance_docker_image(
|
||||
instance_id: str,
|
||||
) -> str:
|
||||
return f'ghcr.io/swefficiency/swefficiency-images:{instance_id}'
|
||||
|
||||
|
||||
def get_config(
|
||||
instance: pd.Series,
|
||||
metadata: EvalMetadata,
|
||||
cpu_group: list[int] | None = None,
|
||||
) -> OpenHandsConfig:
|
||||
# We use a different instance image for the each instance of swe-bench eval
|
||||
base_container_image = get_instance_docker_image(
|
||||
instance['instance_id'],
|
||||
)
|
||||
logger.info(
|
||||
f'Using instance container image: {base_container_image}. '
|
||||
f'Please make sure this image exists. '
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
sandbox_config.enable_auto_lint = True
|
||||
sandbox_config.use_host_network = False
|
||||
sandbox_config.timeout = 3600
|
||||
|
||||
# Control container cleanup behavior via environment variable
|
||||
# Default to False for multiprocessing stability to prevent cascade failures
|
||||
sandbox_config.rm_all_containers = True
|
||||
|
||||
sandbox_config.platform = 'linux/amd64'
|
||||
sandbox_config.remote_runtime_resource_factor = 4.0
|
||||
sandbox_config.runtime_startup_env_vars.update(
|
||||
{
|
||||
'NO_CHANGE_TIMEOUT_SECONDS': '900', # 15 minutes
|
||||
}
|
||||
)
|
||||
|
||||
if cpu_group is not None:
|
||||
print(f'Configuring Docker runtime with CPU group: {cpu_group}')
|
||||
sandbox_config.docker_runtime_kwargs = {
|
||||
# HACK: Use the cpu_group if provided, otherwise use all available CPUs
|
||||
'cpuset_cpus': ','.join(map(str, cpu_group)),
|
||||
'nano_cpus': int(1e9 * len(cpu_group)), # optional: hard cap to vCPU count
|
||||
'mem_limit': '16g',
|
||||
}
|
||||
|
||||
# Note: We keep rm_all_containers = False for worker process safety
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
|
||||
)
|
||||
)
|
||||
agent_config = AgentConfig(
|
||||
enable_jupyter=False,
|
||||
enable_browsing=RUN_WITH_BROWSING,
|
||||
enable_llm_editor=False,
|
||||
enable_mcp=False,
|
||||
condenser=metadata.condenser_config,
|
||||
enable_prompt_extensions=False,
|
||||
)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
|
||||
def initialize_runtime(
|
||||
runtime: Runtime,
|
||||
instance: pd.Series, # this argument is not required
|
||||
metadata: EvalMetadata,
|
||||
):
|
||||
"""Initialize the runtime for the agent.
|
||||
|
||||
This function is called before the runtime is used to run the agent.
|
||||
"""
|
||||
logger.info('-' * 30)
|
||||
logger.info('BEGIN Runtime Initialization Fn')
|
||||
logger.info('-' * 30)
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
obs: CmdOutputObservation
|
||||
|
||||
# Set instance id and git configuration
|
||||
action = CmdRunAction(
|
||||
command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc && git config --global core.pager "" && git config --global diff.binary false"""
|
||||
)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to export SWE_INSTANCE_ID and configure git: {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
|
||||
|
||||
# inject the init script
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
# inject the instance info
|
||||
action = CmdRunAction(command='mkdir -p /swe_util/eval_data/instances')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to create /swe_util/eval_data/instances: {str(obs)}',
|
||||
)
|
||||
|
||||
swe_instance_json_name = 'swe-bench-instance.json'
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Construct the full path for the desired file name within the temporary directory
|
||||
temp_file_path = os.path.join(temp_dir, swe_instance_json_name)
|
||||
# Write to the file with the desired name within the temporary directory
|
||||
with open(temp_file_path, 'w') as f:
|
||||
if not isinstance(instance, dict):
|
||||
json.dump([instance.to_dict()], f)
|
||||
else:
|
||||
json.dump([instance], f)
|
||||
|
||||
# Copy the file to the desired location
|
||||
runtime.copy_to(temp_file_path, '/swe_util/eval_data/instances/')
|
||||
|
||||
# inject the instance swe entry
|
||||
runtime.copy_to(
|
||||
str(os.path.join(script_dir, 'scripts/setup/instance_swe_entry.sh')),
|
||||
'/swe_util/',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='cat ~/.bashrc')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='source ~/.bashrc')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
if isinstance(obs, ErrorObservation):
|
||||
logger.error(f'Failed to source ~/.bashrc: {str(obs)}')
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to source /swe_util/instance_swe_entry.sh: {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git reset --hard')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(
|
||||
command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
|
||||
)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='which python')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0 and 'testbed' in obs.content,
|
||||
f'Expected to find python interpreter from testbed, but got: {str(obs)}',
|
||||
)
|
||||
|
||||
logger.info('-' * 30)
|
||||
logger.info('END Runtime Initialization Fn')
|
||||
logger.info('-' * 30)
|
||||
|
||||
|
||||
def complete_runtime(
|
||||
runtime: Runtime,
|
||||
instance: pd.Series, # this argument is not required, but it is used to get the workspace_dir_name
|
||||
) -> dict[str, Any]:
|
||||
"""Complete the runtime for the agent.
|
||||
|
||||
This function is called before the runtime is used to run the agent.
|
||||
If you need to do something in the sandbox to get the correctness metric after
|
||||
the agent has run, modify this function.
|
||||
"""
|
||||
logger.info('-' * 30)
|
||||
logger.info('BEGIN Runtime Completion Fn')
|
||||
logger.info('-' * 30)
|
||||
obs: CmdOutputObservation
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
if obs.exit_code == -1:
|
||||
# The previous command is still running
|
||||
# We need to kill previous command
|
||||
logger.info('The previous command is still running, trying to kill it...')
|
||||
action = CmdRunAction(command='C-c')
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
# Then run the command again
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
if obs.exit_code == -1:
|
||||
# The previous command is still running
|
||||
# We need to kill previous command
|
||||
logger.info('The previous command is still running, trying to ctrl+z it...')
|
||||
action = CmdRunAction(command='C-z')
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
# Then run the command again
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git config --global core.pager ""')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to git config --global core.pager "": {str(obs)}',
|
||||
)
|
||||
|
||||
# First check for any git repositories in subdirectories
|
||||
action = CmdRunAction(command='find . -type d -name .git -not -path "./.git"')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to find git repositories: {str(obs)}',
|
||||
)
|
||||
|
||||
git_dirs = [p for p in obs.content.strip().split('\n') if p]
|
||||
if git_dirs:
|
||||
# Remove all .git directories in subdirectories
|
||||
for git_dir in git_dirs:
|
||||
action = CmdRunAction(command=f'rm -rf "{git_dir}"')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to remove git directory {git_dir}: {str(obs)}',
|
||||
)
|
||||
|
||||
# add all files
|
||||
action = CmdRunAction(command='git add -A')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to git add -A: {str(obs)}',
|
||||
)
|
||||
|
||||
# Remove binary files from git staging
|
||||
action = CmdRunAction(command=remove_binary_files_from_git())
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to remove binary files: {str(obs)}',
|
||||
)
|
||||
|
||||
n_retries = 0
|
||||
git_patch = None
|
||||
while n_retries < 5:
|
||||
action = CmdRunAction(
|
||||
command=f'git diff --no-color --cached {instance["base_commit"]} > patch.diff'
|
||||
)
|
||||
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
n_retries += 1
|
||||
if isinstance(obs, CmdOutputObservation):
|
||||
if obs.exit_code == 0:
|
||||
# Read the patch file
|
||||
action = FileReadAction(path='patch.diff')
|
||||
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
if isinstance(obs, FileReadObservation):
|
||||
git_patch = obs.content
|
||||
break
|
||||
elif isinstance(obs, ErrorObservation):
|
||||
# Fall back to cat "patch.diff" to get the patch
|
||||
assert 'File could not be decoded as utf-8' in obs.content
|
||||
action = CmdRunAction(command='cat patch.diff')
|
||||
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation) and obs.exit_code == 0
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
git_patch = obs.content
|
||||
break
|
||||
else:
|
||||
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
|
||||
else:
|
||||
logger.info('Failed to get git diff, retrying...')
|
||||
sleep_if_should_continue(10)
|
||||
elif isinstance(obs, ErrorObservation):
|
||||
logger.error(f'Error occurred: {obs.content}. Retrying...')
|
||||
sleep_if_should_continue(10)
|
||||
else:
|
||||
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
|
||||
|
||||
assert_and_raise(git_patch is not None, 'Failed to get git diff (None)')
|
||||
|
||||
# Remove binary diffs from the patch
|
||||
git_patch = remove_binary_diffs(git_patch)
|
||||
|
||||
logger.info('-' * 30)
|
||||
logger.info('END Runtime Completion Fn')
|
||||
logger.info('-' * 30)
|
||||
return {'git_patch': git_patch}
|
||||
|
||||
|
||||
class CPUGroupManager:
|
||||
def __init__(self, cpu_groups_queue: multiprocessing.Queue):
|
||||
self.cpu_groups_queue = cpu_groups_queue
|
||||
|
||||
def __enter__(self):
|
||||
# Get the current CPU group for this worker]
|
||||
if self.cpu_groups_queue is not None:
|
||||
self.cpu_group = self.cpu_groups_queue.get()
|
||||
logger.info(f'Worker started with CPU group: {self.cpu_group}')
|
||||
return self.cpu_group
|
||||
return None
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
# Put the CPU group back into the queue for other workers to use
|
||||
if self.cpu_groups_queue is not None:
|
||||
self.cpu_groups_queue.put(self.cpu_group)
|
||||
logger.info(f'Worker finished with CPU group: {self.cpu_group}')
|
||||
|
||||
|
||||
def cleanup_docker_resources_for_worker():
|
||||
"""Clean up Docker resources specific to this worker process.
|
||||
|
||||
This prevents cascade failures when one worker's container crashes.
|
||||
Note: This only cleans up stale locks, not containers, to avoid
|
||||
interfering with other workers. Container cleanup is handled
|
||||
by the DockerRuntime.close() method based on configuration.
|
||||
"""
|
||||
|
||||
# Clean up any stale port locks from crashed processes
|
||||
try:
|
||||
from openhands.runtime.utils.port_lock import cleanup_stale_locks
|
||||
|
||||
cleanup_stale_locks(max_age_seconds=300) # Clean up locks older than 5 minutes
|
||||
except Exception as e:
|
||||
logger.debug(f'Error cleaning up stale port locks: {e}')
|
||||
|
||||
|
||||
def process_instance(
|
||||
instance: pd.Series,
|
||||
metadata: EvalMetadata,
|
||||
reset_logger: bool = True,
|
||||
runtime_failure_count: int = 0,
|
||||
cpu_groups_queue: multiprocessing.Queue = None,
|
||||
) -> EvalOutput:
|
||||
# Clean up any Docker resources from previous failed runs
|
||||
cleanup_docker_resources_for_worker()
|
||||
|
||||
# HACK: Use the global and get the cpu group for this worker.
|
||||
with CPUGroupManager(cpu_groups_queue) as cpu_group:
|
||||
config = get_config(instance, metadata, cpu_group=cpu_group)
|
||||
|
||||
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
|
||||
if reset_logger:
|
||||
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
|
||||
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
|
||||
|
||||
metadata = copy.deepcopy(metadata)
|
||||
metadata.details['runtime_failure_count'] = runtime_failure_count
|
||||
metadata.details['remote_runtime_resource_factor'] = (
|
||||
config.sandbox.remote_runtime_resource_factor
|
||||
)
|
||||
|
||||
runtime = create_runtime(config, sid=None)
|
||||
call_async_from_sync(runtime.connect)
|
||||
|
||||
try:
|
||||
initialize_runtime(runtime, instance, metadata)
|
||||
|
||||
message_action = get_instruction(instance, metadata)
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
config=config,
|
||||
initial_user_action=message_action,
|
||||
runtime=runtime,
|
||||
fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
|
||||
metadata.agent_class
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
# if fatal error, throw EvalError to trigger re-run
|
||||
if is_fatal_evaluation_error(state.last_error):
|
||||
raise EvalException('Fatal error detected: ' + state.last_error)
|
||||
|
||||
# ======= THIS IS SWE-Bench specific =======
|
||||
# Get git patch
|
||||
return_val = complete_runtime(runtime, instance)
|
||||
git_patch = return_val['git_patch']
|
||||
logger.info(
|
||||
f'Got git diff for instance {instance.instance_id}:\n--------\n{git_patch}\n--------'
|
||||
)
|
||||
except Exception as e:
|
||||
# Log the error but don't let it crash other workers
|
||||
logger.error(
|
||||
f'Error in worker processing instance {instance.instance_id}: {str(e)}'
|
||||
)
|
||||
raise
|
||||
finally:
|
||||
# Ensure runtime is properly closed to prevent cascade failures
|
||||
try:
|
||||
runtime.close()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f'Error closing runtime for {instance.instance_id}: {str(e)}'
|
||||
)
|
||||
# Don't re-raise - we want to continue cleanup
|
||||
|
||||
# ==========================================
|
||||
|
||||
# ======= Attempt to evaluate the agent's edits =======
|
||||
# we use eval_infer.sh to evaluate the agent's edits, not here
|
||||
# because the agent may alter the environment / testcases
|
||||
test_result = {
|
||||
'git_patch': git_patch,
|
||||
}
|
||||
|
||||
# If you are working on some simpler benchmark that only evaluates the final model output (e.g., in a MessageAction)
|
||||
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
|
||||
histories = [event_to_dict(event) for event in state.history]
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Save the output
|
||||
instruction = message_action.content
|
||||
if message_action.image_urls:
|
||||
instruction += (
|
||||
'\n\n<image_urls>'
|
||||
+ '\n'.join(message_action.image_urls)
|
||||
+ '</image_urls>'
|
||||
)
|
||||
output = EvalOutput(
|
||||
instance_id=instance.instance_id,
|
||||
instruction=instruction,
|
||||
instance=instance.to_dict(), # SWE Bench specific
|
||||
test_result=test_result,
|
||||
metadata=metadata,
|
||||
history=histories,
|
||||
metrics=metrics,
|
||||
error=state.last_error if state and state.last_error else None,
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
|
||||
file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.toml')
|
||||
if os.path.exists(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
data = toml.load(file)
|
||||
if 'selected_ids' in data:
|
||||
selected_ids = data['selected_ids']
|
||||
logger.info(
|
||||
f'Filtering {len(selected_ids)} tasks from "selected_ids"...'
|
||||
)
|
||||
subset = dataset[dataset[filter_column].isin(selected_ids)]
|
||||
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
|
||||
return subset
|
||||
if 'selected_repos' in data:
|
||||
# repos for the swe-bench instances:
|
||||
# ['astropy/astropy', 'django/django', 'matplotlib/matplotlib', 'mwaskom/seaborn', 'pallets/flask', 'psf/requests', 'pydata/xarray', 'pylint-dev/pylint', 'pytest-dev/pytest', 'scikit-learn/scikit-learn', 'sphinx-doc/sphinx', 'sympy/sympy']
|
||||
selected_repos = data['selected_repos']
|
||||
if isinstance(selected_repos, str):
|
||||
selected_repos = [selected_repos]
|
||||
assert isinstance(selected_repos, list)
|
||||
logger.info(
|
||||
f'Filtering {selected_repos} tasks from "selected_repos"...'
|
||||
)
|
||||
subset = dataset[dataset['repo'].isin(selected_repos)]
|
||||
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
|
||||
return subset
|
||||
|
||||
skip_ids = os.environ.get('SKIP_IDS', '').split(',')
|
||||
if len(skip_ids) > 0:
|
||||
logger.info(f'Filtering {len(skip_ids)} tasks from "SKIP_IDS"...')
|
||||
return dataset[~dataset[filter_column].isin(skip_ids)]
|
||||
return dataset
|
||||
|
||||
|
||||
def divide_cpus_among_workers(num_workers, num_cpus_per_worker=4, num_to_skip=0):
|
||||
"""Divide CPUs among workers, with better error handling for multiprocessing."""
|
||||
try:
|
||||
current_cpus = list(os.sched_getaffinity(0))
|
||||
except AttributeError:
|
||||
# os.sched_getaffinity not available on all platforms
|
||||
import multiprocessing
|
||||
|
||||
current_cpus = list(range(multiprocessing.cpu_count()))
|
||||
|
||||
num_cpus = len(current_cpus)
|
||||
if num_workers <= 0:
|
||||
raise ValueError('Number of workers must be greater than 0')
|
||||
|
||||
# Chec that num worers and num_cpus_per_worker fit into available CPUs
|
||||
total_cpus_needed = num_workers * num_cpus_per_worker + num_to_skip
|
||||
if total_cpus_needed > num_cpus:
|
||||
raise ValueError(
|
||||
f'Not enough CPUs available. Requested {total_cpus_needed} '
|
||||
f'CPUs (num_workers={num_workers}, num_cpus_per_worker={num_cpus_per_worker}, '
|
||||
f'num_to_skip={num_to_skip}), but only {num_cpus} CPUs are available.'
|
||||
)
|
||||
|
||||
# Divide this into groups, skipping the first `num_to_skip` CPUs.
|
||||
available_cpus = current_cpus[num_to_skip:]
|
||||
cpu_groups = [
|
||||
available_cpus[i * num_cpus_per_worker : (i + 1) * num_cpus_per_worker]
|
||||
for i in range(num_workers)
|
||||
]
|
||||
print(
|
||||
f'Divided {num_cpus} CPUs into {num_workers} groups, each with {num_cpus_per_worker} CPUs.'
|
||||
)
|
||||
print(f'CPU groups: {cpu_groups}')
|
||||
|
||||
return cpu_groups
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = get_evaluation_parser()
|
||||
parser.add_argument(
|
||||
'--dataset',
|
||||
type=str,
|
||||
default=None,
|
||||
help='data set to evaluate on, for now use local.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--split',
|
||||
type=str,
|
||||
default='test',
|
||||
help='split to evaluate on',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--mode',
|
||||
type=str,
|
||||
default='swe',
|
||||
help='mode to evaluate on',
|
||||
)
|
||||
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
|
||||
# so we don't need to manage file uploading to OpenHands's repo
|
||||
|
||||
# dataset = load_dataset(args.dataset, split=args.split)
|
||||
# swe_bench_tests = filter_dataset(dataset.to_pandas(), 'instance_id')
|
||||
dataset = load_dataset(args.dataset, split=args.split)
|
||||
|
||||
# Convert dataset to pandas DataFrame if it is not already.
|
||||
if not isinstance(dataset, pd.DataFrame):
|
||||
dataset = dataset.to_pandas()
|
||||
|
||||
dataset['version'] = dataset['version'].astype(str)
|
||||
|
||||
# Convert created_at column to string.
|
||||
dataset['created_at'] = dataset['created_at'].astype(str)
|
||||
|
||||
swe_bench_tests = filter_dataset(dataset, 'instance_id')
|
||||
|
||||
logger.info(
|
||||
f'Loaded dataset {args.dataset} with split {args.split}: {len(swe_bench_tests)} tasks'
|
||||
)
|
||||
|
||||
llm_config = None
|
||||
if args.llm_config:
|
||||
llm_config = get_llm_config_arg(args.llm_config)
|
||||
llm_config.log_completions = True
|
||||
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
|
||||
llm_config.modify_params = False
|
||||
|
||||
if llm_config is None:
|
||||
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
|
||||
|
||||
# Get condenser config from environment variable
|
||||
condenser_name = os.environ.get('EVAL_CONDENSER')
|
||||
if condenser_name:
|
||||
condenser_config = get_condenser_config_arg(condenser_name)
|
||||
if condenser_config is None:
|
||||
raise ValueError(
|
||||
f'Could not find Condenser config: EVAL_CONDENSER={condenser_name}'
|
||||
)
|
||||
else:
|
||||
# If no specific condenser config is provided via env var, default to NoOpCondenser
|
||||
condenser_config = NoOpCondenserConfig()
|
||||
logger.debug(
|
||||
'No Condenser config provided via EVAL_CONDENSER, using NoOpCondenser.'
|
||||
)
|
||||
|
||||
details = {'mode': args.mode}
|
||||
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
|
||||
|
||||
dataset_descrption = (
|
||||
args.dataset.replace('/', '__') + '-' + args.split.replace('/', '__')
|
||||
)
|
||||
metadata = make_metadata(
|
||||
llm_config,
|
||||
dataset_descrption,
|
||||
args.agent_cls,
|
||||
args.max_iterations,
|
||||
args.eval_note,
|
||||
args.eval_output_dir,
|
||||
details=details,
|
||||
condenser_config=condenser_config,
|
||||
)
|
||||
|
||||
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
|
||||
print(f'### OUTPUT FILE: {output_file} ###')
|
||||
|
||||
# Run evaluation in iterative mode:
|
||||
# If a rollout fails to output AgentFinishAction, we will try again until it succeeds OR total 3 attempts have been made.
|
||||
ITERATIVE_EVAL_MODE = (
|
||||
os.environ.get('ITERATIVE_EVAL_MODE', 'false').lower() == 'true'
|
||||
)
|
||||
ITERATIVE_EVAL_MODE_MAX_ATTEMPTS = int(
|
||||
os.environ.get('ITERATIVE_EVAL_MODE_MAX_ATTEMPTS', '3')
|
||||
)
|
||||
|
||||
# Get all CPUs and divide into groups of num_workers and put them into a multiprocessing.Queue.
|
||||
cpu_groups_queue = None
|
||||
cpu_groups_list = divide_cpus_among_workers(args.eval_num_workers, num_to_skip=8)
|
||||
cpu_groups_queue = multiprocessing.Manager().Queue()
|
||||
for cpu_group in cpu_groups_list:
|
||||
cpu_groups_queue.put(cpu_group)
|
||||
|
||||
if not ITERATIVE_EVAL_MODE:
|
||||
# load the dataset
|
||||
instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit)
|
||||
|
||||
process_instance_with_cpu_groups = functools.partial(
|
||||
process_instance,
|
||||
cpu_groups_queue=cpu_groups_queue,
|
||||
)
|
||||
|
||||
config = get_config(
|
||||
instances.iloc[0], # Use the first instance to get the config
|
||||
metadata,
|
||||
cpu_group=None, # We will use the cpu_groups_queue to get the cpu group later
|
||||
)
|
||||
|
||||
run_evaluation(
|
||||
instances,
|
||||
metadata,
|
||||
output_file,
|
||||
args.eval_num_workers,
|
||||
process_instance_with_cpu_groups,
|
||||
timeout_seconds=8
|
||||
* 60
|
||||
* 60, # 8 hour PER instance should be more than enough
|
||||
max_retries=3,
|
||||
)
|
||||
else:
|
||||
critic = AgentFinishedCritic()
|
||||
|
||||
def get_cur_output_file_path(attempt: int) -> str:
|
||||
return (
|
||||
f'{output_file.removesuffix(".jsonl")}.critic_attempt_{attempt}.jsonl'
|
||||
)
|
||||
|
||||
eval_ids = None
|
||||
for attempt in range(1, ITERATIVE_EVAL_MODE_MAX_ATTEMPTS + 1):
|
||||
cur_output_file = get_cur_output_file_path(attempt)
|
||||
logger.info(
|
||||
f'Running evaluation with critic {critic.__class__.__name__} for attempt {attempt} of {ITERATIVE_EVAL_MODE_MAX_ATTEMPTS}.'
|
||||
)
|
||||
|
||||
# For deterministic eval, we set temperature to 0.1 for (>1) attempt
|
||||
# so hopefully we get slightly different results
|
||||
if attempt > 1 and metadata.llm_config.temperature == 0:
|
||||
logger.info(
|
||||
f'Detected temperature is 0 for (>1) attempt {attempt}. Setting temperature to 0.1...'
|
||||
)
|
||||
metadata.llm_config.temperature = 0.1
|
||||
|
||||
# Load instances - at first attempt, we evaluate all instances
|
||||
# On subsequent attempts, we only evaluate the instances that failed the previous attempt determined by critic
|
||||
instances = prepare_dataset(
|
||||
swe_bench_tests, cur_output_file, args.eval_n_limit, eval_ids=eval_ids
|
||||
)
|
||||
if len(instances) > 0 and not isinstance(
|
||||
instances['PASS_TO_PASS'][instances['PASS_TO_PASS'].index[0]], str
|
||||
):
|
||||
for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']:
|
||||
instances[col] = instances[col].apply(lambda x: str(x))
|
||||
|
||||
# Run evaluation - but save them to cur_output_file
|
||||
logger.info(
|
||||
f'Evaluating {len(instances)} instances for attempt {attempt}...'
|
||||
)
|
||||
run_evaluation(
|
||||
instances,
|
||||
metadata,
|
||||
cur_output_file,
|
||||
args.eval_num_workers,
|
||||
process_instance,
|
||||
timeout_seconds=8
|
||||
* 60
|
||||
* 60, # 8 hour PER instance should be more than enough
|
||||
max_retries=1,
|
||||
)
|
||||
|
||||
# When eval is done, we update eval_ids to the instances that failed the current attempt
|
||||
instances_failed = []
|
||||
logger.info(
|
||||
f'Use critic {critic.__class__.__name__} to check {len(instances)} instances for attempt {attempt}...'
|
||||
)
|
||||
with open(cur_output_file, 'r') as f:
|
||||
for line in f:
|
||||
instance = json.loads(line)
|
||||
try:
|
||||
history = [
|
||||
event_from_dict(event) for event in instance['history']
|
||||
]
|
||||
critic_result = critic.evaluate(
|
||||
history, instance['test_result'].get('git_patch', '')
|
||||
)
|
||||
if not critic_result.success:
|
||||
instances_failed.append(instance['instance_id'])
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f'Error loading history for instance {instance["instance_id"]}: {e}'
|
||||
)
|
||||
instances_failed.append(instance['instance_id'])
|
||||
logger.info(
|
||||
f'{len(instances_failed)} instances failed the current attempt {attempt}: {instances_failed}'
|
||||
)
|
||||
eval_ids = instances_failed
|
||||
|
||||
# If no instances failed, we break
|
||||
if len(instances_failed) == 0:
|
||||
break
|
||||
|
||||
# Then we should aggregate the results from all attempts into the original output file
|
||||
# and remove the intermediate files
|
||||
logger.info(
|
||||
'Aggregating results from all attempts into the original output file...'
|
||||
)
|
||||
fout = open(output_file, 'w')
|
||||
added_instance_ids = set()
|
||||
for attempt in reversed(range(1, ITERATIVE_EVAL_MODE_MAX_ATTEMPTS + 1)):
|
||||
cur_output_file = get_cur_output_file_path(attempt)
|
||||
if not os.path.exists(cur_output_file):
|
||||
logger.warning(
|
||||
f'Intermediate output file {cur_output_file} does not exist. Skipping...'
|
||||
)
|
||||
continue
|
||||
|
||||
with open(cur_output_file, 'r') as f:
|
||||
for line in f:
|
||||
instance = json.loads(line)
|
||||
# Also make sure git_patch is not empty - otherwise we fall back to previous attempt (empty patch is worse than anything else)
|
||||
if (
|
||||
instance['instance_id'] not in added_instance_ids
|
||||
and instance['test_result'].get('git_patch', '').strip()
|
||||
):
|
||||
fout.write(line)
|
||||
added_instance_ids.add(instance['instance_id'])
|
||||
logger.info(
|
||||
f'Aggregated instances from {cur_output_file}. Total instances added so far: {len(added_instance_ids)}'
|
||||
)
|
||||
fout.close()
|
||||
logger.info(
|
||||
f'Done! Total {len(added_instance_ids)} instances added to {output_file}'
|
||||
)
|
||||
@@ -1,148 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
EVAL_LIMIT=$4
|
||||
MAX_ITER=$5
|
||||
NUM_WORKERS=$6
|
||||
DATASET=$7
|
||||
SPLIT=$8
|
||||
N_RUNS=$9
|
||||
MODE=${10}
|
||||
|
||||
|
||||
if [ -z "$NUM_WORKERS" ]; then
|
||||
NUM_WORKERS=1
|
||||
echo "Number of workers not specified, use default $NUM_WORKERS"
|
||||
fi
|
||||
checkout_eval_branch
|
||||
|
||||
if [ -z "$AGENT" ]; then
|
||||
echo "Agent not specified, use default CodeActAgent"
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
if [ -z "$MAX_ITER" ]; then
|
||||
echo "MAX_ITER not specified, use default 100"
|
||||
MAX_ITER=100
|
||||
fi
|
||||
|
||||
if [ -z "$RUN_WITH_BROWSING" ]; then
|
||||
echo "RUN_WITH_BROWSING not specified, use default false"
|
||||
RUN_WITH_BROWSING=false
|
||||
fi
|
||||
|
||||
|
||||
if [ -z "$DATASET" ]; then
|
||||
echo "DATASET not specified, use default princeton-nlp/SWE-bench_Lite"
|
||||
DATASET="swefficiency/swefficiency"
|
||||
fi
|
||||
|
||||
if [ -z "$SPLIT" ]; then
|
||||
echo "SPLIT not specified, use default test"
|
||||
SPLIT="test"
|
||||
fi
|
||||
|
||||
if [ -z "$MODE" ]; then
|
||||
MODE="swe"
|
||||
echo "MODE not specified, use default $MODE"
|
||||
fi
|
||||
|
||||
if [ -n "$EVAL_CONDENSER" ]; then
|
||||
echo "Using Condenser Config: $EVAL_CONDENSER"
|
||||
else
|
||||
echo "No Condenser Config provided via EVAL_CONDENSER, use default (NoOpCondenser)."
|
||||
fi
|
||||
|
||||
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
|
||||
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
|
||||
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
echo "SPLIT: $SPLIT"
|
||||
echo "MAX_ITER: $MAX_ITER"
|
||||
echo "NUM_WORKERS: $NUM_WORKERS"
|
||||
echo "COMMIT_HASH: $COMMIT_HASH"
|
||||
echo "MODE: $MODE"
|
||||
echo "EVAL_CONDENSER: $EVAL_CONDENSER"
|
||||
|
||||
# Default to NOT use Hint
|
||||
if [ -z "$USE_HINT_TEXT" ]; then
|
||||
export USE_HINT_TEXT=false
|
||||
fi
|
||||
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
# if not using Hint, add -no-hint to the eval note
|
||||
if [ "$USE_HINT_TEXT" = false ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-no-hint"
|
||||
fi
|
||||
|
||||
if [ "$RUN_WITH_BROWSING" = true ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-with-browsing"
|
||||
fi
|
||||
|
||||
if [ -n "$EXP_NAME" ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
|
||||
fi
|
||||
# if mode != swe, add mode to the eval note
|
||||
if [ "$MODE" != "swe" ]; then
|
||||
EVAL_NOTE="${EVAL_NOTE}-${MODE}"
|
||||
fi
|
||||
# Add condenser config to eval note if provided
|
||||
if [ -n "$EVAL_CONDENSER" ]; then
|
||||
EVAL_NOTE="${EVAL_NOTE}-${EVAL_CONDENSER}"
|
||||
fi
|
||||
|
||||
# export RUNTIME="remote"
|
||||
# export SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev"
|
||||
export NO_CHANGE_TIMEOUT_SECONDS=900 # 15 minutes
|
||||
|
||||
function run_eval() {
|
||||
local eval_note="${1}"
|
||||
COMMAND="poetry run python evaluation/benchmarks/swefficiency/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $eval_note \
|
||||
--dataset $DATASET \
|
||||
--split $SPLIT \
|
||||
--mode $MODE"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
|
||||
fi
|
||||
|
||||
# Run the command
|
||||
eval $COMMAND
|
||||
}
|
||||
|
||||
unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
|
||||
if [ -z "$N_RUNS" ]; then
|
||||
N_RUNS=1
|
||||
echo "N_RUNS not specified, use default $N_RUNS"
|
||||
fi
|
||||
|
||||
# Skip runs if the run number is in the SKIP_RUNS list
|
||||
# read from env variable SKIP_RUNS as a comma separated list of run numbers
|
||||
SKIP_RUNS=(${SKIP_RUNS//,/ })
|
||||
for i in $(seq 1 $N_RUNS); do
|
||||
if [[ " ${SKIP_RUNS[@]} " =~ " $i " ]]; then
|
||||
echo "Skipping run $i"
|
||||
continue
|
||||
fi
|
||||
current_eval_note="$EVAL_NOTE-run_$i"
|
||||
echo "EVAL_NOTE: $current_eval_note"
|
||||
run_eval $current_eval_note
|
||||
done
|
||||
|
||||
checkout_original_branch
|
||||
@@ -1,43 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
source ~/.bashrc
|
||||
SWEUTIL_DIR=/swe_util
|
||||
|
||||
# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
|
||||
# SWE_INSTANCE_ID=django__django-11099
|
||||
if [ -z "$SWE_INSTANCE_ID" ]; then
|
||||
echo "Error: SWE_INSTANCE_ID is not set." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
|
||||
item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-instance.json)
|
||||
|
||||
if [[ -z "$item" ]]; then
|
||||
echo "No item found for the provided instance ID."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
WORKSPACE_NAME=$(echo "$item" | jq -r '(.repo | tostring) + "__" + (.version | tostring) | gsub("/"; "__")')
|
||||
|
||||
echo "WORKSPACE_NAME: $WORKSPACE_NAME"
|
||||
|
||||
# Clear the workspace
|
||||
if [ -d /workspace ]; then
|
||||
rm -rf /workspace/*
|
||||
else
|
||||
mkdir /workspace
|
||||
fi
|
||||
# Copy repo to workspace
|
||||
if [ -d /workspace/$WORKSPACE_NAME ]; then
|
||||
rm -rf /workspace/$WORKSPACE_NAME
|
||||
fi
|
||||
mkdir -p /workspace
|
||||
cp -r /testbed /workspace/$WORKSPACE_NAME
|
||||
|
||||
# Activate instance-specific environment
|
||||
if [ -d /opt/miniconda3 ]; then
|
||||
. /opt/miniconda3/etc/profile.d/conda.sh
|
||||
conda activate testbed
|
||||
fi
|
||||
@@ -1,27 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
EVAL_WORKSPACE="evaluation/benchmarks/swe_bench/eval_workspace"
|
||||
mkdir -p $EVAL_WORKSPACE
|
||||
|
||||
# 1. Prepare REPO
|
||||
echo "==== Prepare SWE-bench repo ===="
|
||||
OH_SWE_BENCH_REPO_PATH="https://github.com/All-Hands-AI/SWE-bench.git"
|
||||
OH_SWE_BENCH_REPO_BRANCH="eval"
|
||||
git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench
|
||||
|
||||
# 2. Prepare DATA
|
||||
echo "==== Prepare SWE-bench data ===="
|
||||
EVAL_IMAGE=ghcr.io/all-hands-ai/eval-swe-bench:builder_with_conda
|
||||
EVAL_WORKSPACE=$(realpath $EVAL_WORKSPACE)
|
||||
chmod +x $EVAL_WORKSPACE/OH-SWE-bench/swebench/harness/prepare_data.sh
|
||||
if [ -d $EVAL_WORKSPACE/eval_data ]; then
|
||||
rm -r $EVAL_WORKSPACE/eval_data
|
||||
fi
|
||||
docker run \
|
||||
-v $EVAL_WORKSPACE:/workspace \
|
||||
-w /workspace \
|
||||
-u $(id -u):$(id -g) \
|
||||
-e HF_DATASETS_CACHE="/tmp" \
|
||||
--rm -it $EVAL_IMAGE \
|
||||
bash -c "cd OH-SWE-bench/swebench/harness && /swe_util/miniforge3/bin/conda run -n swe-bench-eval ./prepare_data.sh && mv eval_data /workspace/"
|
||||
@@ -1,96 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
# assert user name is `root`
|
||||
if [ "$USER" != "root" ]; then
|
||||
echo "Error: This script is intended to be run by the 'root' user only." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
source ~/.bashrc
|
||||
|
||||
SWEUTIL_DIR=/swe_util
|
||||
|
||||
# Create logs directory
|
||||
LOG_DIR=/openhands/logs
|
||||
mkdir -p $LOG_DIR && chmod 777 $LOG_DIR
|
||||
|
||||
# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
|
||||
# SWE_INSTANCE_ID=django__django-11099
|
||||
if [ -z "$SWE_INSTANCE_ID" ]; then
|
||||
echo "Error: SWE_INSTANCE_ID is not set." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
|
||||
item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-test-lite.json)
|
||||
|
||||
if [[ -z "$item" ]]; then
|
||||
echo "No item found for the provided instance ID."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CONDA_ENV_NAME=$(echo "$item" | jq -r '.repo + "__" + .version | gsub("/"; "__")')
|
||||
|
||||
echo "CONDA_ENV_NAME: $CONDA_ENV_NAME"
|
||||
|
||||
SWE_TASK_DIR=/openhands/swe_tasks
|
||||
mkdir -p $SWE_TASK_DIR
|
||||
# Dump test_patch to /workspace/test.patch
|
||||
echo "$item" | jq -r '.test_patch' > $SWE_TASK_DIR/test.patch
|
||||
# Dump patch to /workspace/gold.patch
|
||||
echo "$item" | jq -r '.patch' > $SWE_TASK_DIR/gold.patch
|
||||
# Dump the item to /workspace/instance.json except for the "test_patch" and "patch" fields
|
||||
echo "$item" | jq 'del(.test_patch, .patch)' > $SWE_TASK_DIR/instance.json
|
||||
|
||||
# Clear the workspace
|
||||
rm -rf /workspace/*
|
||||
# Copy repo to workspace
|
||||
if [ -d /workspace/$CONDA_ENV_NAME ]; then
|
||||
rm -rf /workspace/$CONDA_ENV_NAME
|
||||
fi
|
||||
cp -r $SWEUTIL_DIR/eval_data/testbeds/$CONDA_ENV_NAME /workspace
|
||||
|
||||
# Reset swe-bench testbed and install the repo
|
||||
. $SWEUTIL_DIR/miniforge3/etc/profile.d/conda.sh
|
||||
conda config --set changeps1 False
|
||||
conda config --append channels conda-forge
|
||||
conda activate swe-bench-eval
|
||||
|
||||
mkdir -p $SWE_TASK_DIR/reset_testbed_temp
|
||||
mkdir -p $SWE_TASK_DIR/reset_testbed_log_dir
|
||||
SWE_BENCH_DIR=/swe_util/OH-SWE-bench
|
||||
output=$(
|
||||
export PYTHONPATH=$SWE_BENCH_DIR && \
|
||||
cd $SWE_BENCH_DIR && \
|
||||
python swebench/harness/reset_swe_env.py \
|
||||
--swe_bench_tasks $SWEUTIL_DIR/eval_data/instances/swe-bench-test.json \
|
||||
--temp_dir $SWE_TASK_DIR/reset_testbed_temp \
|
||||
--testbed /workspace \
|
||||
--conda_path $SWEUTIL_DIR/miniforge3 \
|
||||
--instance_id $SWE_INSTANCE_ID \
|
||||
--log_dir $SWE_TASK_DIR/reset_testbed_log_dir \
|
||||
--timeout 900 \
|
||||
--verbose
|
||||
)
|
||||
|
||||
REPO_PATH=$(echo "$output" | awk -F': ' '/repo_path:/ {print $2}')
|
||||
TEST_CMD=$(echo "$output" | awk -F': ' '/test_cmd:/ {print $2}')
|
||||
echo "Repo Path: $REPO_PATH"
|
||||
echo "Test Command: $TEST_CMD"
|
||||
|
||||
echo "export SWE_BENCH_DIR=\"$SWE_BENCH_DIR\"" >> ~/.bashrc
|
||||
echo "export REPO_PATH=\"$REPO_PATH\"" >> ~/.bashrc
|
||||
echo "export TEST_CMD=\"$TEST_CMD\"" >> ~/.bashrc
|
||||
|
||||
if [[ "$REPO_PATH" == "None" ]]; then
|
||||
echo "Error: Failed to retrieve repository path. Tests may not have passed or output was not as expected." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Activate instance-specific environment
|
||||
. $SWEUTIL_DIR/miniforge3/etc/profile.d/conda.sh
|
||||
conda activate $CONDA_ENV_NAME
|
||||
|
||||
set +e
|
||||
@@ -71,7 +71,6 @@ beforeEach(() => {
|
||||
provider_tokens_set: {
|
||||
github: "some-token",
|
||||
gitlab: null,
|
||||
azure_devops: null,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
@@ -23,7 +23,6 @@ const MOCK_RESPOSITORIES: GitRepository[] = [
|
||||
{ id: "2", full_name: "repo2", git_provider: "github", is_public: true },
|
||||
{ id: "3", full_name: "repo3", git_provider: "gitlab", is_public: true },
|
||||
{ id: "4", full_name: "repo4", git_provider: "gitlab", is_public: true },
|
||||
{ id: "5", full_name: "repo5", git_provider: "azure_devops", is_public: true },
|
||||
];
|
||||
|
||||
const renderTaskCard = (task = MOCK_TASK_1) => {
|
||||
|
||||
@@ -124,9 +124,6 @@ describe("Content", () => {
|
||||
await screen.findByTestId("bitbucket-token-input");
|
||||
await screen.findByTestId("bitbucket-token-help-anchor");
|
||||
|
||||
await screen.findByTestId("azure-devops-token-input");
|
||||
await screen.findByTestId("azure-devops-token-help-anchor");
|
||||
|
||||
getConfigSpy.mockResolvedValue(VALID_SAAS_CONFIG);
|
||||
queryClient.invalidateQueries();
|
||||
rerender();
|
||||
@@ -152,13 +149,6 @@ describe("Content", () => {
|
||||
expect(
|
||||
screen.queryByTestId("bitbucket-token-help-anchor"),
|
||||
).not.toBeInTheDocument();
|
||||
|
||||
expect(
|
||||
screen.queryByTestId("azure-devops-token-input"),
|
||||
).not.toBeInTheDocument();
|
||||
expect(
|
||||
screen.queryByTestId("azure-devops-token-help-anchor"),
|
||||
).not.toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -297,7 +287,6 @@ describe("Form submission", () => {
|
||||
github: { token: "test-token", host: "" },
|
||||
gitlab: { token: "", host: "" },
|
||||
bitbucket: { token: "", host: "" },
|
||||
azure_devops: { token: "", host: "" },
|
||||
});
|
||||
});
|
||||
|
||||
@@ -319,7 +308,6 @@ describe("Form submission", () => {
|
||||
github: { token: "", host: "" },
|
||||
gitlab: { token: "test-token", host: "" },
|
||||
bitbucket: { token: "", host: "" },
|
||||
azure_devops: { token: "", host: "" },
|
||||
});
|
||||
});
|
||||
|
||||
@@ -341,29 +329,6 @@ describe("Form submission", () => {
|
||||
github: { token: "", host: "" },
|
||||
gitlab: { token: "", host: "" },
|
||||
bitbucket: { token: "test-token", host: "" },
|
||||
azure_devops: { token: "", host: "" },
|
||||
});
|
||||
});
|
||||
|
||||
it("should save the Azure DevOps token", async () => {
|
||||
const saveProvidersSpy = vi.spyOn(SecretsService, "addGitProvider");
|
||||
saveProvidersSpy.mockImplementation(() => Promise.resolve(true));
|
||||
const getConfigSpy = vi.spyOn(OptionService, "getConfig");
|
||||
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
|
||||
|
||||
renderGitSettingsScreen();
|
||||
|
||||
const azureDevOpsInput = await screen.findByTestId("azure-devops-token-input");
|
||||
const submit = await screen.findByTestId("submit-button");
|
||||
|
||||
await userEvent.type(azureDevOpsInput, "test-token");
|
||||
await userEvent.click(submit);
|
||||
|
||||
expect(saveProvidersSpy).toHaveBeenCalledWith({
|
||||
github: { token: "", host: "" },
|
||||
gitlab: { token: "", host: "" },
|
||||
bitbucket: { token: "", host: "" },
|
||||
azure_devops: { token: "test-token", host: "" },
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ describe("convertRawProvidersToList", () => {
|
||||
const example1: Partial<Record<Provider, string | null>> | undefined = {
|
||||
github: "test-token",
|
||||
gitlab: "test-token",
|
||||
azure_devops: "test-token",
|
||||
};
|
||||
const example2: Partial<Record<Provider, string | null>> | undefined = {
|
||||
github: "",
|
||||
@@ -15,13 +14,9 @@ describe("convertRawProvidersToList", () => {
|
||||
const example3: Partial<Record<Provider, string | null>> | undefined = {
|
||||
gitlab: null,
|
||||
};
|
||||
const example4: Partial<Record<Provider, string | null>> | undefined = {
|
||||
azure_devops: "test-token",
|
||||
};
|
||||
|
||||
expect(convertRawProvidersToList(example1)).toEqual(["github", "gitlab", "azure_devops"]);
|
||||
expect(convertRawProvidersToList(example1)).toEqual(["github", "gitlab"]);
|
||||
expect(convertRawProvidersToList(example2)).toEqual(["github"]);
|
||||
expect(convertRawProvidersToList(example3)).toEqual(["gitlab"]);
|
||||
expect(convertRawProvidersToList(example4)).toEqual(["azure_devops"]);
|
||||
});
|
||||
});
|
||||
|
||||
16
frontend/package-lock.json
generated
16
frontend/package-lock.json
generated
@@ -39,7 +39,7 @@
|
||||
"jose": "^6.1.0",
|
||||
"lucide-react": "^0.544.0",
|
||||
"monaco-editor": "^0.53.0",
|
||||
"posthog-js": "^1.298.1",
|
||||
"posthog-js": "^1.290.0",
|
||||
"react": "^19.1.1",
|
||||
"react-dom": "^19.1.1",
|
||||
"react-highlight": "^0.15.0",
|
||||
@@ -3910,9 +3910,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@posthog/core": {
|
||||
"version": "1.6.0",
|
||||
"resolved": "https://registry.npmjs.org/@posthog/core/-/core-1.6.0.tgz",
|
||||
"integrity": "sha512-Tbh8UACwbb7jFdDC7wwXHtfNzO+4wKh3VbyMHmp2UBe6w1jliJixexTJNfkqdGZm+ht3M10mcKvGGPnoZ2zLBg==",
|
||||
"version": "1.5.2",
|
||||
"resolved": "https://registry.npmjs.org/@posthog/core/-/core-1.5.2.tgz",
|
||||
"integrity": "sha512-iedUP3EnOPPxTA2VaIrsrd29lSZnUV+ZrMnvY56timRVeZAXoYCkmjfIs3KBAsF8OUT5h1GXLSkoQdrV0r31OQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"cross-spawn": "^7.0.6"
|
||||
@@ -14711,12 +14711,12 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/posthog-js": {
|
||||
"version": "1.298.1",
|
||||
"resolved": "https://registry.npmjs.org/posthog-js/-/posthog-js-1.298.1.tgz",
|
||||
"integrity": "sha512-MynFhC2HO6sg5moUfpkd0s6RzAqcqFX75kjIi4Xrj2Gl0/YQWYvFUgvv8FCpWPKPs2mdvNWYhs+oqJg0BVVHPw==",
|
||||
"version": "1.290.0",
|
||||
"resolved": "https://registry.npmjs.org/posthog-js/-/posthog-js-1.290.0.tgz",
|
||||
"integrity": "sha512-zavBwZkf+3JeiSDVE7ZDXBfzva/iOljicdhdJH+cZoqp0LsxjKxjnNhGOd3KpAhw0wqdwjhd7Lp1aJuI7DXyaw==",
|
||||
"license": "SEE LICENSE IN LICENSE",
|
||||
"dependencies": {
|
||||
"@posthog/core": "1.6.0",
|
||||
"@posthog/core": "1.5.2",
|
||||
"core-js": "^3.38.1",
|
||||
"fflate": "^0.4.8",
|
||||
"preact": "^10.19.3",
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
"jose": "^6.1.0",
|
||||
"lucide-react": "^0.544.0",
|
||||
"monaco-editor": "^0.53.0",
|
||||
"posthog-js": "^1.298.1",
|
||||
"posthog-js": "^1.290.0",
|
||||
"react": "^19.1.1",
|
||||
"react-dom": "^19.1.1",
|
||||
"react-highlight": "^0.15.0",
|
||||
|
||||
@@ -296,25 +296,6 @@ class V1ConversationService {
|
||||
const { data } = await openHands.get<{ runtime_id: string }>(url);
|
||||
return data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a file from a specific conversation's sandbox workspace
|
||||
* @param conversationId The conversation ID
|
||||
* @param filePath Path to the file to read within the sandbox workspace (defaults to /workspace/project/PLAN.md)
|
||||
* @returns The content of the file or an empty string if the file doesn't exist
|
||||
*/
|
||||
static async readConversationFile(
|
||||
conversationId: string,
|
||||
filePath: string = "/workspace/project/PLAN.md",
|
||||
): Promise<string> {
|
||||
const params = new URLSearchParams();
|
||||
params.append("file_path", filePath);
|
||||
|
||||
const { data } = await openHands.get<string>(
|
||||
`/api/v1/app-conversations/${conversationId}/file?${params.toString()}`,
|
||||
);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
export default V1ConversationService;
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
<svg height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg"><path fill="currentColor" d="m22 18-5 4-8-3v3l-4.19-5.75 12.91 1.05v-10.96l4.28-.69zm-17.19-1.75v-7.29l12.91-2.62-7.12-4.34v2.84l-6.63 1.92-1.97 2.62v5.69z"/></svg>
|
||||
|
Before Width: | Height: | Size: 248 B |
@@ -1,9 +1,15 @@
|
||||
import React from "react";
|
||||
import Markdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import remarkBreaks from "remark-breaks";
|
||||
import { code } from "../markdown/code";
|
||||
import { cn } from "#/utils/utils";
|
||||
import { ul, ol } from "../markdown/list";
|
||||
import { CopyToClipboardButton } from "#/components/shared/buttons/copy-to-clipboard-button";
|
||||
import { anchor } from "../markdown/anchor";
|
||||
import { OpenHandsSourceType } from "#/types/core/base";
|
||||
import { paragraph } from "../markdown/paragraph";
|
||||
import { TooltipButton } from "#/components/shared/buttons/tooltip-button";
|
||||
import { MarkdownRenderer } from "../markdown/markdown-renderer";
|
||||
|
||||
interface ChatMessageProps {
|
||||
type: OpenHandsSourceType;
|
||||
@@ -13,7 +19,6 @@ interface ChatMessageProps {
|
||||
onClick: () => void;
|
||||
tooltip?: string;
|
||||
}>;
|
||||
isFromPlanningAgent?: boolean;
|
||||
}
|
||||
|
||||
export function ChatMessage({
|
||||
@@ -21,7 +26,6 @@ export function ChatMessage({
|
||||
message,
|
||||
children,
|
||||
actions,
|
||||
isFromPlanningAgent = false,
|
||||
}: React.PropsWithChildren<ChatMessageProps>) {
|
||||
const [isHovering, setIsHovering] = React.useState(false);
|
||||
const [isCopy, setIsCopy] = React.useState(false);
|
||||
@@ -55,7 +59,6 @@ export function ChatMessage({
|
||||
"flex flex-col gap-2",
|
||||
type === "user" && " p-4 bg-tertiary self-end",
|
||||
type === "agent" && "mt-6 w-full max-w-full bg-transparent",
|
||||
isFromPlanningAgent && "border border-[#597ff4] bg-tertiary p-4",
|
||||
)}
|
||||
>
|
||||
<div
|
||||
@@ -110,7 +113,18 @@ export function ChatMessage({
|
||||
wordBreak: "break-word",
|
||||
}}
|
||||
>
|
||||
<MarkdownRenderer includeStandard>{message}</MarkdownRenderer>
|
||||
<Markdown
|
||||
components={{
|
||||
code,
|
||||
ul,
|
||||
ol,
|
||||
a: anchor,
|
||||
p: paragraph,
|
||||
}}
|
||||
remarkPlugins={[remarkGfm, remarkBreaks]}
|
||||
>
|
||||
{message}
|
||||
</Markdown>
|
||||
</div>
|
||||
{children}
|
||||
</article>
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
import React from "react";
|
||||
import Markdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import remarkBreaks from "remark-breaks";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { code } from "../markdown/code";
|
||||
import { ol, ul } from "../markdown/list";
|
||||
import ArrowDown from "#/icons/angle-down-solid.svg?react";
|
||||
import ArrowUp from "#/icons/angle-up-solid.svg?react";
|
||||
import i18n from "#/i18n";
|
||||
import { MarkdownRenderer } from "../markdown/markdown-renderer";
|
||||
|
||||
interface ErrorMessageProps {
|
||||
errorId?: string;
|
||||
@@ -36,7 +40,18 @@ export function ErrorMessage({ errorId, defaultMessage }: ErrorMessageProps) {
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{showDetails && <MarkdownRenderer>{defaultMessage}</MarkdownRenderer>}
|
||||
{showDetails && (
|
||||
<Markdown
|
||||
components={{
|
||||
code,
|
||||
ul,
|
||||
ol,
|
||||
}}
|
||||
remarkPlugins={[remarkGfm, remarkBreaks]}
|
||||
>
|
||||
{defaultMessage}
|
||||
</Markdown>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import { useEffect, useState } from "react";
|
||||
import { Trans, useTranslation } from "react-i18next";
|
||||
import Markdown from "react-markdown";
|
||||
import { Link } from "react-router";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import remarkBreaks from "remark-breaks";
|
||||
import { useConfig } from "#/hooks/query/use-config";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import ArrowDown from "#/icons/angle-down-solid.svg?react";
|
||||
@@ -10,7 +13,9 @@ import XCircle from "#/icons/x-circle-solid.svg?react";
|
||||
import { OpenHandsAction } from "#/types/core/actions";
|
||||
import { OpenHandsObservation } from "#/types/core/observations";
|
||||
import { cn } from "#/utils/utils";
|
||||
import { MarkdownRenderer } from "../markdown/markdown-renderer";
|
||||
import { code } from "../markdown/code";
|
||||
import { ol, ul } from "../markdown/list";
|
||||
import { paragraph } from "../markdown/paragraph";
|
||||
import { MonoComponent } from "./mono-component";
|
||||
import { PathComponent } from "./path-component";
|
||||
|
||||
@@ -187,7 +192,17 @@ export function ExpandableMessage({
|
||||
</div>
|
||||
{showDetails && (
|
||||
<div className="text-sm">
|
||||
<MarkdownRenderer includeStandard>{details}</MarkdownRenderer>
|
||||
<Markdown
|
||||
components={{
|
||||
code,
|
||||
ul,
|
||||
ol,
|
||||
p: paragraph,
|
||||
}}
|
||||
remarkPlugins={[remarkGfm, remarkBreaks]}
|
||||
>
|
||||
{details}
|
||||
</Markdown>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
import React from "react";
|
||||
import Markdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import remarkBreaks from "remark-breaks";
|
||||
import { code } from "../markdown/code";
|
||||
import { ol, ul } from "../markdown/list";
|
||||
import ArrowDown from "#/icons/angle-down-solid.svg?react";
|
||||
import ArrowUp from "#/icons/angle-up-solid.svg?react";
|
||||
import { SuccessIndicator } from "./success-indicator";
|
||||
import { ObservationResultStatus } from "./event-content-helpers/get-observation-result";
|
||||
import { MarkdownRenderer } from "../markdown/markdown-renderer";
|
||||
|
||||
interface GenericEventMessageProps {
|
||||
title: React.ReactNode;
|
||||
@@ -45,7 +49,16 @@ export function GenericEventMessage({
|
||||
|
||||
{showDetails &&
|
||||
(typeof details === "string" ? (
|
||||
<MarkdownRenderer>{details}</MarkdownRenderer>
|
||||
<Markdown
|
||||
components={{
|
||||
code,
|
||||
ul,
|
||||
ol,
|
||||
}}
|
||||
remarkPlugins={[remarkGfm, remarkBreaks]}
|
||||
>
|
||||
{details}
|
||||
</Markdown>
|
||||
) : (
|
||||
details
|
||||
))}
|
||||
|
||||
@@ -39,7 +39,7 @@ export function ConversationCardFooter({
|
||||
{(createdAt ?? lastUpdatedAt) && (
|
||||
<p className="text-xs text-[#A3A3A3] flex-1 text-right">
|
||||
<time>
|
||||
{`${formatTimeDelta(lastUpdatedAt ?? createdAt)} ${t(I18nKey.CONVERSATION$AGO)}`}
|
||||
{`${formatTimeDelta(new Date(lastUpdatedAt ?? createdAt))} ${t(I18nKey.CONVERSATION$AGO)}`}
|
||||
</time>
|
||||
</p>
|
||||
)}
|
||||
|
||||
@@ -3,13 +3,12 @@ import { FaCodeBranch } from "react-icons/fa";
|
||||
import { IconType } from "react-icons/lib";
|
||||
import { RepositorySelection } from "#/api/open-hands.types";
|
||||
import { Provider } from "#/types/settings";
|
||||
import AzureDevOpsLogo from "#/assets/branding/azure-devops-logo.svg?react";
|
||||
|
||||
interface ConversationRepoLinkProps {
|
||||
selectedRepository: RepositorySelection;
|
||||
}
|
||||
|
||||
const providerIcon: Partial<Record<Provider, IconType>> = {
|
||||
const providerIcon: Record<Provider, IconType> = {
|
||||
bitbucket: FaBitbucket,
|
||||
github: FaGithub,
|
||||
gitlab: FaGitlab,
|
||||
@@ -27,9 +26,6 @@ export function ConversationRepoLink({
|
||||
<div className="flex items-center gap-3 flex-1">
|
||||
<div className="flex items-center gap-1">
|
||||
{Icon && <Icon size={14} className="text-[#A3A3A3]" />}
|
||||
{selectedRepository.git_provider === "azure_devops" && (
|
||||
<AzureDevOpsLogo className="text-[#A3A3A3] w-[14px] h-[14px]" />
|
||||
)}
|
||||
<span
|
||||
data-testid="conversation-card-selected-repository"
|
||||
className="text-xs text-[#A3A3A3] whitespace-nowrap overflow-hidden text-ellipsis max-w-44"
|
||||
|
||||
@@ -31,7 +31,7 @@ export function StartTaskCardFooter({
|
||||
{createdAt && (
|
||||
<p className="text-xs text-[#A3A3A3] flex-1 text-right">
|
||||
<time>
|
||||
{`${formatTimeDelta(createdAt)} ${t(I18nKey.CONVERSATION$AGO)}`}
|
||||
{`${formatTimeDelta(new Date(createdAt))} ${t(I18nKey.CONVERSATION$AGO)}`}
|
||||
</time>
|
||||
</p>
|
||||
)}
|
||||
|
||||
@@ -12,8 +12,7 @@ export function ContextWindowSection({
|
||||
}: ContextWindowSectionProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const usagePercentage =
|
||||
contextWindow > 0 ? (perTurnToken / contextWindow) * 100 : 0;
|
||||
const usagePercentage = (perTurnToken / contextWindow) * 100;
|
||||
const progressWidth = Math.min(100, usagePercentage);
|
||||
|
||||
return (
|
||||
|
||||
@@ -51,8 +51,6 @@ export function GitProviderDropdown({
|
||||
return "GitLab";
|
||||
case "bitbucket":
|
||||
return "Bitbucket";
|
||||
case "azure_devops":
|
||||
return "Azure DevOps";
|
||||
case "enterprise_sso":
|
||||
return "Enterprise SSO";
|
||||
default:
|
||||
|
||||
@@ -67,14 +67,12 @@ export function RecentConversation({ conversation }: RecentConversationProps) {
|
||||
</div>
|
||||
) : null}
|
||||
</div>
|
||||
{(conversation.created_at || conversation.last_updated_at) && (
|
||||
<span>
|
||||
{formatTimeDelta(
|
||||
conversation.created_at || conversation.last_updated_at,
|
||||
)}{" "}
|
||||
{t(I18nKey.CONVERSATION$AGO)}
|
||||
</span>
|
||||
)}
|
||||
<span>
|
||||
{formatTimeDelta(
|
||||
new Date(conversation.created_at || conversation.last_updated_at),
|
||||
)}{" "}
|
||||
{t(I18nKey.CONVERSATION$AGO)}
|
||||
</span>
|
||||
</div>
|
||||
</button>
|
||||
</Link>
|
||||
|
||||
@@ -56,15 +56,6 @@ export function TaskCard({ task }: TaskCardProps) {
|
||||
const issueType =
|
||||
task.task_type === "OPEN_ISSUE" ? "issues" : "pull-requests";
|
||||
href = `https://bitbucket.org/${task.repo}/${issueType}/${task.issue_number}`;
|
||||
} else if (task.git_provider === "azure_devops") {
|
||||
// Azure DevOps URL format: https://dev.azure.com/{organization}/{project}/_workitems/edit/{id}
|
||||
// or https://dev.azure.com/{organization}/{project}/_git/{repo}/pullrequest/{id}
|
||||
const azureDevOpsBaseUrl = "https://dev.azure.com";
|
||||
if (task.task_type === "OPEN_ISSUE") {
|
||||
href = `${azureDevOpsBaseUrl}/${task.repo}/_workitems/edit/${task.issue_number}`;
|
||||
} else {
|
||||
href = `${azureDevOpsBaseUrl}/${task.repo}/_git/${task.repo.split("/")[1]}/pullrequest/${task.issue_number}`;
|
||||
}
|
||||
} else {
|
||||
const hrefType = task.task_type === "OPEN_ISSUE" ? "issues" : "pull";
|
||||
href = `https://github.com/${task.repo}/${hrefType}/${task.issue_number}`;
|
||||
|
||||
@@ -8,7 +8,7 @@ export function h1({
|
||||
React.HTMLAttributes<HTMLHeadingElement> &
|
||||
ExtraProps) {
|
||||
return (
|
||||
<h1 className="text-2xl text-white font-bold leading-8 mb-4 mt-6 first:mt-0">
|
||||
<h1 className="text-[32px] text-white font-bold leading-8 mb-4 mt-6 first:mt-0">
|
||||
{children}
|
||||
</h1>
|
||||
);
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
import Markdown, { Components } from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import remarkBreaks from "remark-breaks";
|
||||
import { code } from "./code";
|
||||
import { ul, ol } from "./list";
|
||||
import { paragraph } from "./paragraph";
|
||||
import { anchor } from "./anchor";
|
||||
import { h1, h2, h3, h4, h5, h6 } from "./headings";
|
||||
|
||||
interface MarkdownRendererProps {
|
||||
/**
|
||||
* The markdown content to render. Can be passed as children (string) or content prop.
|
||||
*/
|
||||
children?: string;
|
||||
content?: string;
|
||||
/**
|
||||
* Additional or override components for markdown elements.
|
||||
* Default components (code, ul, ol) are always included unless overridden.
|
||||
*/
|
||||
components?: Partial<Components>;
|
||||
/**
|
||||
* Whether to include standard components (anchor, paragraph).
|
||||
* Defaults to false.
|
||||
*/
|
||||
includeStandard?: boolean;
|
||||
/**
|
||||
* Whether to include heading components (h1-h6).
|
||||
* Defaults to false.
|
||||
*/
|
||||
includeHeadings?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* A reusable Markdown renderer component that provides consistent
|
||||
* markdown rendering across the application.
|
||||
*
|
||||
* By default, includes:
|
||||
* - code, ul, ol components
|
||||
* - remarkGfm and remarkBreaks plugins
|
||||
*
|
||||
* Can be extended with:
|
||||
* - includeStandard: adds anchor and paragraph components
|
||||
* - includeHeadings: adds h1-h6 heading components
|
||||
* - components prop: allows custom overrides or additional components
|
||||
*/
|
||||
export function MarkdownRenderer({
|
||||
children,
|
||||
content,
|
||||
components: customComponents,
|
||||
includeStandard = false,
|
||||
includeHeadings = false,
|
||||
}: MarkdownRendererProps) {
|
||||
// Build the components object with defaults and optional additions
|
||||
const components: Components = {
|
||||
code,
|
||||
ul,
|
||||
ol,
|
||||
...(includeStandard && {
|
||||
a: anchor,
|
||||
p: paragraph,
|
||||
}),
|
||||
...(includeHeadings && {
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6,
|
||||
}),
|
||||
...customComponents, // Custom components override defaults
|
||||
};
|
||||
|
||||
const markdownContent = content ?? children ?? "";
|
||||
|
||||
return (
|
||||
<Markdown components={components} remarkPlugins={[remarkGfm, remarkBreaks]}>
|
||||
{markdownContent}
|
||||
</Markdown>
|
||||
);
|
||||
}
|
||||
@@ -1,10 +1,16 @@
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { Spinner } from "@heroui/react";
|
||||
import Markdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import remarkBreaks from "remark-breaks";
|
||||
import { code } from "../markdown/code";
|
||||
import { ul, ol } from "../markdown/list";
|
||||
import { paragraph } from "../markdown/paragraph";
|
||||
import { anchor } from "../markdown/anchor";
|
||||
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
|
||||
import { useRepositoryMicroagentContent } from "#/hooks/query/use-repository-microagent-content";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { extractRepositoryInfo } from "#/utils/utils";
|
||||
import { MarkdownRenderer } from "../markdown/markdown-renderer";
|
||||
|
||||
export function MicroagentManagementViewMicroagentContent() {
|
||||
const { t } = useTranslation();
|
||||
@@ -43,9 +49,18 @@ export function MicroagentManagementViewMicroagentContent() {
|
||||
</div>
|
||||
)}
|
||||
{microagentData && !isLoading && !error && (
|
||||
<MarkdownRenderer includeStandard>
|
||||
<Markdown
|
||||
components={{
|
||||
code,
|
||||
ul,
|
||||
ol,
|
||||
a: anchor,
|
||||
p: paragraph,
|
||||
}}
|
||||
remarkPlugins={[remarkGfm, remarkBreaks]}
|
||||
>
|
||||
{microagentData.content}
|
||||
</MarkdownRenderer>
|
||||
</Markdown>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
|
||||
export function AzureDevOpsTokenHelpAnchor() {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<p data-testid="azure-devops-token-help-anchor" className="text-xs">
|
||||
<a
|
||||
href="https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate"
|
||||
target="_blank"
|
||||
className="underline underline-offset-2"
|
||||
rel="noopener noreferrer"
|
||||
aria-label={t(I18nKey.GIT$AZURE_DEVOPS_TOKEN_HELP)}
|
||||
>
|
||||
{t(I18nKey.GIT$AZURE_DEVOPS_TOKEN_HELP)}
|
||||
</a>
|
||||
</p>
|
||||
);
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { SettingsInput } from "../settings-input";
|
||||
import { AzureDevOpsTokenHelpAnchor } from "./azure-devops-token-help-anchor";
|
||||
import { KeyStatusIcon } from "../key-status-icon";
|
||||
|
||||
interface AzureDevOpsTokenInputProps {
|
||||
onChange: (value: string) => void;
|
||||
onAzureDevOpsHostChange: (value: string) => void;
|
||||
isAzureDevOpsTokenSet: boolean;
|
||||
name: string;
|
||||
azureDevOpsHostSet: string | null | undefined;
|
||||
}
|
||||
|
||||
export function AzureDevOpsTokenInput({
|
||||
onChange,
|
||||
onAzureDevOpsHostChange,
|
||||
isAzureDevOpsTokenSet,
|
||||
name,
|
||||
azureDevOpsHostSet,
|
||||
}: AzureDevOpsTokenInputProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-6">
|
||||
<SettingsInput
|
||||
testId={name}
|
||||
name={name}
|
||||
onChange={onChange}
|
||||
label={t(I18nKey.GIT$AZURE_DEVOPS_TOKEN)}
|
||||
type="password"
|
||||
className="w-full max-w-[680px]"
|
||||
placeholder={isAzureDevOpsTokenSet ? "<hidden>" : ""}
|
||||
startContent={
|
||||
isAzureDevOpsTokenSet && (
|
||||
<KeyStatusIcon
|
||||
testId="azure-devops-set-token-indicator"
|
||||
isSet={isAzureDevOpsTokenSet}
|
||||
/>
|
||||
)
|
||||
}
|
||||
/>
|
||||
|
||||
<SettingsInput
|
||||
onChange={onAzureDevOpsHostChange || (() => {})}
|
||||
name="azure-devops-host-input"
|
||||
testId="azure-devops-host-input"
|
||||
label={t(I18nKey.GIT$AZURE_DEVOPS_HOST)}
|
||||
type="text"
|
||||
className="w-full max-w-[680px]"
|
||||
placeholder={t(I18nKey.GIT$AZURE_DEVOPS_HOST_PLACEHOLDER)}
|
||||
defaultValue={azureDevOpsHostSet || undefined}
|
||||
startContent={
|
||||
azureDevOpsHostSet &&
|
||||
azureDevOpsHostSet.trim() !== "" && (
|
||||
<KeyStatusIcon testId="azure-devops-set-host-indicator" isSet />
|
||||
)
|
||||
}
|
||||
/>
|
||||
|
||||
<AzureDevOpsTokenHelpAnchor />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { useConfig } from "#/hooks/query/use-config";
|
||||
import { useAuthUrl } from "#/hooks/use-auth-url";
|
||||
import { BrandButton } from "../brand-button";
|
||||
|
||||
export function ConfigureAzureDevOpsAnchor() {
|
||||
const { t } = useTranslation();
|
||||
const { data: config } = useConfig();
|
||||
|
||||
const authUrl = useAuthUrl({
|
||||
appMode: config?.APP_MODE ?? null,
|
||||
identityProvider: "azure_devops",
|
||||
authUrl: config?.AUTH_URL,
|
||||
});
|
||||
|
||||
const handleOAuthFlow = () => {
|
||||
if (!authUrl) {
|
||||
return;
|
||||
}
|
||||
|
||||
window.location.href = authUrl;
|
||||
};
|
||||
|
||||
return (
|
||||
<div data-testid="configure-azure-devops-button" className="py-9">
|
||||
<BrandButton
|
||||
type="button"
|
||||
variant="primary"
|
||||
className="w-55"
|
||||
onClick={handleOAuthFlow}
|
||||
>
|
||||
{t(I18nKey.AZURE_DEVOPS$CONNECT_ACCOUNT)}
|
||||
</BrandButton>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -8,7 +8,6 @@ import { BrandButton } from "../settings/brand-button";
|
||||
import GitHubLogo from "#/assets/branding/github-logo.svg?react";
|
||||
import GitLabLogo from "#/assets/branding/gitlab-logo.svg?react";
|
||||
import BitbucketLogo from "#/assets/branding/bitbucket-logo.svg?react";
|
||||
import AzureDevOpsLogo from "#/assets/branding/azure-devops-logo.svg?react";
|
||||
import { useAuthUrl } from "#/hooks/use-auth-url";
|
||||
import { GetConfigResponse } from "#/api/option-service/option.types";
|
||||
import { Provider } from "#/types/settings";
|
||||
@@ -42,12 +41,6 @@ export function AuthModal({
|
||||
authUrl,
|
||||
});
|
||||
|
||||
const azureDevOpsAuthUrl = useAuthUrl({
|
||||
appMode: appMode || null,
|
||||
identityProvider: "azure_devops",
|
||||
authUrl,
|
||||
});
|
||||
|
||||
const enterpriseSsoUrl = useAuthUrl({
|
||||
appMode: appMode || null,
|
||||
identityProvider: "enterprise_sso",
|
||||
@@ -78,13 +71,6 @@ export function AuthModal({
|
||||
}
|
||||
};
|
||||
|
||||
const handleAzureDevOpsAuth = () => {
|
||||
if (azureDevOpsAuthUrl) {
|
||||
// Always start the OIDC flow, let the backend handle TOS check
|
||||
window.location.href = azureDevOpsAuthUrl;
|
||||
}
|
||||
};
|
||||
|
||||
const handleEnterpriseSsoAuth = () => {
|
||||
if (enterpriseSsoUrl) {
|
||||
trackLoginButtonClick({ provider: "enterprise_sso" });
|
||||
@@ -106,10 +92,6 @@ export function AuthModal({
|
||||
providersConfigured &&
|
||||
providersConfigured.length > 0 &&
|
||||
providersConfigured.includes("bitbucket");
|
||||
const showAzureDevOps =
|
||||
providersConfigured &&
|
||||
providersConfigured.length > 0 &&
|
||||
providersConfigured.includes("azure_devops");
|
||||
const showEnterpriseSso =
|
||||
providersConfigured &&
|
||||
providersConfigured.length > 0 &&
|
||||
@@ -172,18 +154,6 @@ export function AuthModal({
|
||||
</BrandButton>
|
||||
)}
|
||||
|
||||
{showAzureDevOps && (
|
||||
<BrandButton
|
||||
type="button"
|
||||
variant="primary"
|
||||
onClick={handleAzureDevOpsAuth}
|
||||
className="w-full font-semibold"
|
||||
startContent={<AzureDevOpsLogo width={20} height={20} />}
|
||||
>
|
||||
{t(I18nKey.AZURE_DEVOPS$CONNECT_ACCOUNT)}
|
||||
</BrandButton>
|
||||
)}
|
||||
|
||||
{showEnterpriseSso && (
|
||||
<BrandButton
|
||||
type="button"
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import { FaBitbucket, FaGithub, FaGitlab } from "react-icons/fa6";
|
||||
import { Provider } from "#/types/settings";
|
||||
import AzureDevOpsLogo from "#/assets/branding/azure-devops-logo.svg?react";
|
||||
|
||||
interface GitProviderIconProps {
|
||||
gitProvider: Provider;
|
||||
@@ -14,13 +13,8 @@ export function GitProviderIcon({
|
||||
return (
|
||||
<>
|
||||
{gitProvider === "github" && <FaGithub size={14} className={className} />}
|
||||
{gitProvider === "gitlab" && <FaGitlab size={14} className={className} />}
|
||||
{gitProvider === "bitbucket" && (
|
||||
<FaBitbucket size={14} className={className} />
|
||||
)}
|
||||
{gitProvider === "azure_devops" && (
|
||||
<AzureDevOpsLogo className={`${className} w-[14px] h-[14px]`} />
|
||||
)}
|
||||
{gitProvider === "gitlab" && <FaGitlab className={className} />}
|
||||
{gitProvider === "bitbucket" && <FaBitbucket className={className} />}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -21,11 +21,7 @@ interface ModelSelectorProps {
|
||||
isDisabled?: boolean;
|
||||
models: Record<string, { separator: string; models: string[] }>;
|
||||
currentModel?: string;
|
||||
onChange?: (provider: string | null, model: string | null) => void;
|
||||
onDefaultValuesChanged?: (
|
||||
provider: string | null,
|
||||
model: string | null,
|
||||
) => void;
|
||||
onChange?: (model: string | null) => void;
|
||||
wrapperClassName?: string;
|
||||
labelClassName?: string;
|
||||
}
|
||||
@@ -35,7 +31,6 @@ export function ModelSelector({
|
||||
models,
|
||||
currentModel,
|
||||
onChange,
|
||||
onDefaultValuesChanged,
|
||||
wrapperClassName,
|
||||
labelClassName,
|
||||
}: ModelSelectorProps) {
|
||||
@@ -61,7 +56,6 @@ export function ModelSelector({
|
||||
setLitellmId(currentModel);
|
||||
setSelectedProvider(provider);
|
||||
setSelectedModel(model);
|
||||
onDefaultValuesChanged?.(provider, model);
|
||||
}
|
||||
}, [currentModel]);
|
||||
|
||||
@@ -71,7 +65,6 @@ export function ModelSelector({
|
||||
|
||||
const separator = models[provider]?.separator || "";
|
||||
setLitellmId(provider + separator);
|
||||
onChange?.(provider, null);
|
||||
};
|
||||
|
||||
const handleChangeModel = (model: string) => {
|
||||
@@ -83,7 +76,7 @@ export function ModelSelector({
|
||||
}
|
||||
setLitellmId(fullModel);
|
||||
setSelectedModel(model);
|
||||
onChange?.(selectedProvider, model);
|
||||
onChange?.(fullModel);
|
||||
};
|
||||
|
||||
const clear = () => {
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
import { MessageEvent } from "#/types/v1/core";
|
||||
import { BaseEvent } from "#/types/v1/core/base/event";
|
||||
import { getSkillReadyContent } from "./get-skill-ready-content";
|
||||
|
||||
/**
|
||||
* Synthetic event type for Skill Ready events.
|
||||
* This extends BaseEvent and includes a marker to identify it as a skill ready event.
|
||||
*/
|
||||
export interface SkillReadyEvent extends BaseEvent {
|
||||
_isSkillReadyEvent: true;
|
||||
_skillReadyContent: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard for Skill Ready events.
|
||||
*/
|
||||
export const isSkillReadyEvent = (event: unknown): event is SkillReadyEvent =>
|
||||
typeof event === "object" &&
|
||||
event !== null &&
|
||||
"_isSkillReadyEvent" in event &&
|
||||
event._isSkillReadyEvent === true;
|
||||
|
||||
/**
|
||||
* Creates a synthetic "Skill Ready" event from a user MessageEvent.
|
||||
* This event appears as originating from the agent and contains formatted
|
||||
* information about activated skills and extended content.
|
||||
*/
|
||||
export const createSkillReadyEvent = (
|
||||
userEvent: MessageEvent,
|
||||
): SkillReadyEvent => {
|
||||
// Support both activated_skills and activated_microagents field names
|
||||
const activatedSkills =
|
||||
(userEvent as unknown as { activated_skills?: string[] })
|
||||
.activated_skills ||
|
||||
userEvent.activated_microagents ||
|
||||
[];
|
||||
|
||||
const extendedContent = userEvent.extended_content || [];
|
||||
|
||||
// Only create event if we have skills or extended content
|
||||
if (activatedSkills.length === 0 && extendedContent.length === 0) {
|
||||
throw new Error(
|
||||
"Cannot create skill ready event without activated skills or extended content",
|
||||
);
|
||||
}
|
||||
|
||||
const content = getSkillReadyContent(activatedSkills, extendedContent);
|
||||
|
||||
return {
|
||||
id: `${userEvent.id}-skill-ready`,
|
||||
timestamp: userEvent.timestamp,
|
||||
source: "agent",
|
||||
_isSkillReadyEvent: true,
|
||||
_skillReadyContent: content,
|
||||
};
|
||||
};
|
||||
@@ -4,7 +4,6 @@ import i18n from "#/i18n";
|
||||
import { SecurityRisk } from "#/types/v1/core/base/common";
|
||||
import {
|
||||
ExecuteBashAction,
|
||||
TerminalAction,
|
||||
FileEditorAction,
|
||||
StrReplaceEditorAction,
|
||||
MCPToolAction,
|
||||
@@ -59,7 +58,7 @@ const getFileEditorActionContent = (
|
||||
|
||||
// Command Actions
|
||||
const getExecuteBashActionContent = (
|
||||
event: ActionEvent<ExecuteBashAction | TerminalAction>,
|
||||
event: ActionEvent<ExecuteBashAction>,
|
||||
): string => {
|
||||
let content = `Command:\n\`${event.action.command}\``;
|
||||
|
||||
@@ -132,61 +131,27 @@ type BrowserAction =
|
||||
|
||||
const getBrowserActionContent = (action: BrowserAction): string => {
|
||||
switch (action.kind) {
|
||||
case "BrowserNavigateAction": {
|
||||
let content = `Browsing ${action.url}`;
|
||||
if (action.new_tab) {
|
||||
content += `\n**New Tab:** Yes`;
|
||||
}
|
||||
return content;
|
||||
}
|
||||
case "BrowserClickAction": {
|
||||
let content = `**Element Index:** ${action.index}`;
|
||||
if (action.new_tab) {
|
||||
content += `\n**New Tab:** Yes`;
|
||||
}
|
||||
return content;
|
||||
}
|
||||
case "BrowserTypeAction": {
|
||||
const textPreview =
|
||||
action.text.length > 50
|
||||
? `${action.text.slice(0, 50)}...`
|
||||
: action.text;
|
||||
return `**Element Index:** ${action.index}\n**Text:** ${textPreview}`;
|
||||
}
|
||||
case "BrowserGetStateAction": {
|
||||
if (action.include_screenshot) {
|
||||
return `**Include Screenshot:** Yes`;
|
||||
case "BrowserNavigateAction":
|
||||
if ("url" in action) {
|
||||
return `Browsing ${action.url}`;
|
||||
}
|
||||
break;
|
||||
case "BrowserClickAction":
|
||||
case "BrowserTypeAction":
|
||||
case "BrowserGetStateAction":
|
||||
case "BrowserGetContentAction":
|
||||
case "BrowserScrollAction":
|
||||
case "BrowserGoBackAction":
|
||||
case "BrowserListTabsAction":
|
||||
case "BrowserSwitchTabAction":
|
||||
case "BrowserCloseTabAction":
|
||||
// These browser actions typically don't need detailed content display
|
||||
return getNoContentActionContent();
|
||||
}
|
||||
case "BrowserGetContentAction": {
|
||||
const parts: string[] = [];
|
||||
if (action.extract_links) {
|
||||
parts.push(`**Extract Links:** Yes`);
|
||||
}
|
||||
if (action.start_from_char > 0) {
|
||||
parts.push(`**Start From Character:** ${action.start_from_char}`);
|
||||
}
|
||||
return parts.length > 0 ? parts.join("\n") : getNoContentActionContent();
|
||||
}
|
||||
case "BrowserScrollAction": {
|
||||
return `**Direction:** ${action.direction}`;
|
||||
}
|
||||
case "BrowserGoBackAction": {
|
||||
return getNoContentActionContent();
|
||||
}
|
||||
case "BrowserListTabsAction": {
|
||||
return getNoContentActionContent();
|
||||
}
|
||||
case "BrowserSwitchTabAction": {
|
||||
return `**Tab ID:** ${action.tab_id}`;
|
||||
}
|
||||
case "BrowserCloseTabAction": {
|
||||
return `**Tab ID:** ${action.tab_id}`;
|
||||
}
|
||||
default:
|
||||
return getNoContentActionContent();
|
||||
}
|
||||
|
||||
return getNoContentActionContent();
|
||||
};
|
||||
|
||||
export const getActionContent = (event: ActionEvent): string => {
|
||||
@@ -199,9 +164,8 @@ export const getActionContent = (event: ActionEvent): string => {
|
||||
return getFileEditorActionContent(action);
|
||||
|
||||
case "ExecuteBashAction":
|
||||
case "TerminalAction":
|
||||
return getExecuteBashActionContent(
|
||||
event as ActionEvent<ExecuteBashAction | TerminalAction>,
|
||||
event as ActionEvent<ExecuteBashAction>,
|
||||
);
|
||||
|
||||
case "MCPToolAction":
|
||||
|
||||
@@ -8,7 +8,6 @@ import { getActionContent } from "./get-action-content";
|
||||
import { getObservationContent } from "./get-observation-content";
|
||||
import { TaskTrackingObservationContent } from "../task-tracking/task-tracking-observation-content";
|
||||
import { TaskTrackerObservation } from "#/types/v1/core/base/observation";
|
||||
import { SkillReadyEvent, isSkillReadyEvent } from "./create-skill-ready-event";
|
||||
import i18n from "#/i18n";
|
||||
|
||||
const trimText = (text: string, maxLength: number): string => {
|
||||
@@ -50,7 +49,6 @@ const getActionEventTitle = (event: OpenHandsEvent): React.ReactNode => {
|
||||
|
||||
switch (actionType) {
|
||||
case "ExecuteBashAction":
|
||||
case "TerminalAction":
|
||||
actionKey = "ACTION_MESSAGE$RUN";
|
||||
actionValues = {
|
||||
command: trimText(event.action.command, 80),
|
||||
@@ -85,20 +83,11 @@ const getActionEventTitle = (event: OpenHandsEvent): React.ReactNode => {
|
||||
actionKey = "ACTION_MESSAGE$TASK_TRACKING";
|
||||
break;
|
||||
case "BrowserNavigateAction":
|
||||
case "BrowserClickAction":
|
||||
case "BrowserTypeAction":
|
||||
case "BrowserGetStateAction":
|
||||
case "BrowserGetContentAction":
|
||||
case "BrowserScrollAction":
|
||||
case "BrowserGoBackAction":
|
||||
case "BrowserListTabsAction":
|
||||
case "BrowserSwitchTabAction":
|
||||
case "BrowserCloseTabAction":
|
||||
actionKey = "ACTION_MESSAGE$BROWSE";
|
||||
break;
|
||||
default:
|
||||
// For unknown actions, use the type name
|
||||
return String(actionType).replace("Action", "").toUpperCase();
|
||||
return actionType.replace("Action", "").toUpperCase();
|
||||
}
|
||||
|
||||
if (actionKey) {
|
||||
@@ -121,7 +110,6 @@ const getObservationEventTitle = (event: OpenHandsEvent): React.ReactNode => {
|
||||
|
||||
switch (observationType) {
|
||||
case "ExecuteBashObservation":
|
||||
case "TerminalObservation":
|
||||
observationKey = "OBSERVATION_MESSAGE$RUN";
|
||||
observationValues = {
|
||||
command: event.observation.command
|
||||
@@ -171,21 +159,11 @@ const getObservationEventTitle = (event: OpenHandsEvent): React.ReactNode => {
|
||||
return observationType;
|
||||
};
|
||||
|
||||
export const getEventContent = (event: OpenHandsEvent | SkillReadyEvent) => {
|
||||
export const getEventContent = (event: OpenHandsEvent) => {
|
||||
let title: React.ReactNode = "";
|
||||
let details: string | React.ReactNode = "";
|
||||
|
||||
// Handle Skill Ready events first
|
||||
if (isSkillReadyEvent(event)) {
|
||||
// Use translation key if available, otherwise use "SKILL READY"
|
||||
const skillReadyKey = "OBSERVATION_MESSAGE$SKILL_READY";
|
||||
if (i18n.exists(skillReadyKey)) {
|
||||
title = createTitleFromKey(skillReadyKey, {});
|
||||
} else {
|
||||
title = "Skill Ready";
|
||||
}
|
||||
details = event._skillReadyContent;
|
||||
} else if (isActionEvent(event)) {
|
||||
if (isActionEvent(event)) {
|
||||
title = getActionEventTitle(event);
|
||||
details = getActionContent(event);
|
||||
} else if (isObservationEvent(event)) {
|
||||
|
||||
@@ -8,7 +8,6 @@ import {
|
||||
ThinkObservation,
|
||||
BrowserObservation,
|
||||
ExecuteBashObservation,
|
||||
TerminalObservation,
|
||||
FileEditorObservation,
|
||||
StrReplaceEditorObservation,
|
||||
TaskTrackerObservation,
|
||||
@@ -24,15 +23,6 @@ const getFileEditorObservationContent = (
|
||||
return `**Error:**\n${observation.error}`;
|
||||
}
|
||||
|
||||
// Extract text content from the observation if it exists
|
||||
const textContent =
|
||||
"content" in observation && Array.isArray(observation.content)
|
||||
? observation.content
|
||||
.filter((c) => c.type === "text")
|
||||
.map((c) => c.text)
|
||||
.join("\n")
|
||||
: null;
|
||||
|
||||
const successMessage = getObservationResult(event) === "success";
|
||||
|
||||
// For view commands or successful edits with content changes, format as code block
|
||||
@@ -44,18 +34,16 @@ const getFileEditorObservationContent = (
|
||||
observation.new_content) ||
|
||||
observation.command === "view"
|
||||
) {
|
||||
// Prefer content over output for view commands, fallback to output if content is not available
|
||||
const displayContent = textContent || observation.output;
|
||||
return `\`\`\`\n${displayContent}\n\`\`\``;
|
||||
return `\`\`\`\n${observation.output}\n\`\`\``;
|
||||
}
|
||||
|
||||
// For other commands, prefer content if available, otherwise use output
|
||||
return textContent || observation.output;
|
||||
// For other commands, return the output as-is
|
||||
return observation.output;
|
||||
};
|
||||
|
||||
// Command Observations
|
||||
const getTerminalObservationContent = (
|
||||
event: ObservationEvent<ExecuteBashObservation | TerminalObservation>,
|
||||
const getExecuteBashObservationContent = (
|
||||
event: ObservationEvent<ExecuteBashObservation>,
|
||||
): string => {
|
||||
const { observation } = event;
|
||||
|
||||
@@ -80,23 +68,14 @@ const getBrowserObservationContent = (
|
||||
): string => {
|
||||
const { observation } = event;
|
||||
|
||||
// Extract text content from the observation
|
||||
const textContent =
|
||||
"content" in observation && Array.isArray(observation.content)
|
||||
? observation.content
|
||||
.filter((c) => c.type === "text")
|
||||
.map((c) => c.text)
|
||||
.join("\n")
|
||||
: "";
|
||||
|
||||
let contentDetails = "";
|
||||
|
||||
if ("is_error" in observation && observation.is_error) {
|
||||
contentDetails += `**Error:**\n${textContent}`;
|
||||
} else {
|
||||
contentDetails += `**Output:**\n${textContent}`;
|
||||
if ("error" in observation && observation.error) {
|
||||
contentDetails += `**Error:**\n${observation.error}\n\n`;
|
||||
}
|
||||
|
||||
contentDetails += `**Output:**\n${observation.output}`;
|
||||
|
||||
if (contentDetails.length > MAX_CONTENT_LENGTH) {
|
||||
contentDetails = `${contentDetails.slice(0, MAX_CONTENT_LENGTH)}...(truncated)`;
|
||||
}
|
||||
@@ -184,22 +163,7 @@ const getFinishObservationContent = (
|
||||
event: ObservationEvent<FinishObservation>,
|
||||
): string => {
|
||||
const { observation } = event;
|
||||
|
||||
// Extract text content from the observation
|
||||
const textContent = observation.content
|
||||
.filter((c) => c.type === "text")
|
||||
.map((c) => c.text)
|
||||
.join("\n");
|
||||
|
||||
let content = "";
|
||||
|
||||
if (observation.is_error) {
|
||||
content += `**Error:**\n${textContent}`;
|
||||
} else {
|
||||
content += textContent;
|
||||
}
|
||||
|
||||
return content;
|
||||
return observation.message || "";
|
||||
};
|
||||
|
||||
export const getObservationContent = (event: ObservationEvent): string => {
|
||||
@@ -215,9 +179,8 @@ export const getObservationContent = (event: ObservationEvent): string => {
|
||||
);
|
||||
|
||||
case "ExecuteBashObservation":
|
||||
case "TerminalObservation":
|
||||
return getTerminalObservationContent(
|
||||
event as ObservationEvent<ExecuteBashObservation | TerminalObservation>,
|
||||
return getExecuteBashObservationContent(
|
||||
event as ObservationEvent<ExecuteBashObservation>,
|
||||
);
|
||||
|
||||
case "BrowserObservation":
|
||||
|
||||
@@ -17,15 +17,6 @@ export const getObservationResult = (
|
||||
if (exitCode === 0 || metadata.exit_code === 0) return "success"; // Command executed successfully
|
||||
return "error"; // Command failed
|
||||
}
|
||||
case "TerminalObservation": {
|
||||
const exitCode =
|
||||
observation.exit_code ?? observation.metadata.exit_code ?? null;
|
||||
|
||||
if (observation.timeout || exitCode === -1) return "timeout";
|
||||
if (exitCode === 0) return "success";
|
||||
if (observation.is_error) return "error";
|
||||
return "success";
|
||||
}
|
||||
case "FileEditorObservation":
|
||||
case "StrReplaceEditorObservation":
|
||||
// Check if there's an error
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
import { TextContent } from "#/types/v1/core/base/common";
|
||||
|
||||
/**
|
||||
* Extracts all text content from an array of TextContent items.
|
||||
*/
|
||||
const extractAllText = (extendedContent: TextContent[]): string =>
|
||||
extendedContent
|
||||
.filter((c) => c.type === "text")
|
||||
.map((c) => c.text)
|
||||
.join("");
|
||||
|
||||
/**
|
||||
* Extracts all <EXTRA_INFO> blocks from the given text.
|
||||
* Returns an array of content strings (without the wrapper tags).
|
||||
*/
|
||||
const extractExtraInfoBlocks = (text: string): string[] => {
|
||||
const blocks: string[] = [];
|
||||
const blockRegex = /<EXTRA_INFO>([\s\S]*?)<\/EXTRA_INFO>/gi;
|
||||
let match = blockRegex.exec(text);
|
||||
|
||||
while (match !== null) {
|
||||
const blockContent = match[1].trim();
|
||||
if (blockContent.length > 0) {
|
||||
blocks.push(blockContent);
|
||||
}
|
||||
match = blockRegex.exec(text);
|
||||
}
|
||||
|
||||
return blocks;
|
||||
};
|
||||
|
||||
/**
|
||||
* Formats a single skill with its corresponding content block.
|
||||
*/
|
||||
const formatSkillWithContent = (
|
||||
skill: string,
|
||||
contentBlock: string | undefined,
|
||||
): string => {
|
||||
let formatted = `\n\n- **${skill}**`;
|
||||
|
||||
if (contentBlock && contentBlock.trim().length > 0) {
|
||||
formatted += `\n\n${contentBlock}`;
|
||||
}
|
||||
|
||||
return formatted;
|
||||
};
|
||||
|
||||
/**
|
||||
* Formats skills paired with their corresponding extended content blocks.
|
||||
*/
|
||||
const formatSkillKnowledge = (
|
||||
activatedSkills: string[],
|
||||
extraInfoBlocks: string[],
|
||||
): string => {
|
||||
let content = `\n\n**Triggered Skill Knowledge:**`;
|
||||
|
||||
activatedSkills.forEach((skill, index) => {
|
||||
const contentBlock =
|
||||
index < extraInfoBlocks.length ? extraInfoBlocks[index] : undefined;
|
||||
content += formatSkillWithContent(skill, contentBlock);
|
||||
});
|
||||
|
||||
return content;
|
||||
};
|
||||
|
||||
/**
|
||||
* Formats extended content blocks when no skills are present.
|
||||
*/
|
||||
const formatExtendedContentOnly = (extraInfoBlocks: string[]): string => {
|
||||
let content = `\n\n**Extended Content:**`;
|
||||
|
||||
extraInfoBlocks.forEach((block) => {
|
||||
if (block.trim().length > 0) {
|
||||
content += `\n\n${block}`;
|
||||
}
|
||||
});
|
||||
|
||||
return content;
|
||||
};
|
||||
|
||||
/**
|
||||
* Formats activated skills and extended content into markdown for display.
|
||||
* Similar to how v0 formats microagent knowledge in recall observations.
|
||||
*
|
||||
* Each skill is paired with its corresponding <EXTRA_INFO> block by index.
|
||||
*/
|
||||
export const getSkillReadyContent = (
|
||||
activatedSkills: string[],
|
||||
extendedContent: TextContent[],
|
||||
): string => {
|
||||
// Extract all <EXTRA_INFO> blocks from extended_content
|
||||
const extraInfoBlocks: string[] = [];
|
||||
if (extendedContent && extendedContent.length > 0) {
|
||||
const allText = extractAllText(extendedContent);
|
||||
extraInfoBlocks.push(...extractExtraInfoBlocks(allText));
|
||||
}
|
||||
|
||||
// Format output based on what we have
|
||||
if (activatedSkills && activatedSkills.length > 0) {
|
||||
return formatSkillKnowledge(activatedSkills, extraInfoBlocks);
|
||||
}
|
||||
|
||||
if (extraInfoBlocks.length > 0) {
|
||||
return formatExtendedContentOnly(extraInfoBlocks);
|
||||
}
|
||||
|
||||
return "";
|
||||
};
|
||||
@@ -3,16 +3,10 @@ import { GenericEventMessage } from "../../../features/chat/generic-event-messag
|
||||
import { getEventContent } from "../event-content-helpers/get-event-content";
|
||||
import { getObservationResult } from "../event-content-helpers/get-observation-result";
|
||||
import { isObservationEvent } from "#/types/v1/type-guards";
|
||||
import {
|
||||
SkillReadyEvent,
|
||||
isSkillReadyEvent,
|
||||
} from "../event-content-helpers/create-skill-ready-event";
|
||||
import { V1ConfirmationButtons } from "#/components/shared/buttons/v1-confirmation-buttons";
|
||||
import { ObservationResultStatus } from "../../../features/chat/event-content-helpers/get-observation-result";
|
||||
import { MarkdownRenderer } from "#/components/features/markdown/markdown-renderer";
|
||||
|
||||
interface GenericEventMessageWrapperProps {
|
||||
event: OpenHandsEvent | SkillReadyEvent;
|
||||
event: OpenHandsEvent;
|
||||
isLastMessage: boolean;
|
||||
}
|
||||
|
||||
@@ -22,29 +16,11 @@ export function GenericEventMessageWrapper({
|
||||
}: GenericEventMessageWrapperProps) {
|
||||
const { title, details } = getEventContent(event);
|
||||
|
||||
// SkillReadyEvent is not an observation event, so skip the observation checks
|
||||
if (!isSkillReadyEvent(event)) {
|
||||
if (isObservationEvent(event)) {
|
||||
if (event.observation.kind === "TaskTrackerObservation") {
|
||||
return <div>{details}</div>;
|
||||
}
|
||||
if (event.observation.kind === "FinishObservation") {
|
||||
return (
|
||||
<MarkdownRenderer includeStandard includeHeadings>
|
||||
{details as string}
|
||||
</MarkdownRenderer>
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine success status
|
||||
let success: ObservationResultStatus | undefined;
|
||||
if (isSkillReadyEvent(event)) {
|
||||
// Skill Ready events should show success indicator, same as v0 recall observations
|
||||
success = "success";
|
||||
} else if (isObservationEvent(event)) {
|
||||
success = getObservationResult(event);
|
||||
if (
|
||||
isObservationEvent(event) &&
|
||||
event.observation.kind === "TaskTrackerObservation"
|
||||
) {
|
||||
return <div>{details}</div>;
|
||||
}
|
||||
|
||||
return (
|
||||
@@ -52,7 +28,9 @@ export function GenericEventMessageWrapper({
|
||||
<GenericEventMessage
|
||||
title={title}
|
||||
details={details}
|
||||
success={success}
|
||||
success={
|
||||
isObservationEvent(event) ? getObservationResult(event) : undefined
|
||||
}
|
||||
initiallyExpanded={false}
|
||||
/>
|
||||
{isLastMessage && <V1ConfirmationButtons />}
|
||||
|
||||
@@ -22,7 +22,6 @@ interface UserAssistantEventMessageProps {
|
||||
tooltip?: string;
|
||||
}>;
|
||||
isLastMessage: boolean;
|
||||
isFromPlanningAgent: boolean;
|
||||
}
|
||||
|
||||
export function UserAssistantEventMessage({
|
||||
@@ -32,7 +31,6 @@ export function UserAssistantEventMessage({
|
||||
microagentPRUrl,
|
||||
actions,
|
||||
isLastMessage,
|
||||
isFromPlanningAgent,
|
||||
}: UserAssistantEventMessageProps) {
|
||||
const message = parseMessageFromEvent(event);
|
||||
|
||||
@@ -48,12 +46,7 @@ export function UserAssistantEventMessage({
|
||||
|
||||
return (
|
||||
<>
|
||||
<ChatMessage
|
||||
type={event.source}
|
||||
message={message}
|
||||
actions={actions}
|
||||
isFromPlanningAgent={isFromPlanningAgent}
|
||||
>
|
||||
<ChatMessage type={event.source} message={message} actions={actions}>
|
||||
{imageUrls.length > 0 && (
|
||||
<ImageCarousel size="small" images={imageUrls} />
|
||||
)}
|
||||
|
||||
@@ -5,7 +5,6 @@ import {
|
||||
isActionEvent,
|
||||
isObservationEvent,
|
||||
isAgentErrorEvent,
|
||||
isUserMessageEvent,
|
||||
} from "#/types/v1/type-guards";
|
||||
import { MicroagentStatus } from "#/types/microagent-status";
|
||||
import { useConfig } from "#/hooks/query/use-config";
|
||||
@@ -18,10 +17,9 @@ import {
|
||||
GenericEventMessageWrapper,
|
||||
ThoughtEventMessage,
|
||||
} from "./event-message-components";
|
||||
import { createSkillReadyEvent } from "./event-content-helpers/create-skill-ready-event";
|
||||
|
||||
interface EventMessageProps {
|
||||
event: OpenHandsEvent & { isFromPlanningAgent?: boolean };
|
||||
event: OpenHandsEvent;
|
||||
messages: OpenHandsEvent[];
|
||||
isLastMessage: boolean;
|
||||
microagentStatus?: MicroagentStatus | null;
|
||||
@@ -35,104 +33,6 @@ interface EventMessageProps {
|
||||
isInLast10Actions: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts activated skills from a MessageEvent, supporting both
|
||||
* activated_skills and activated_microagents field names.
|
||||
*/
|
||||
const getActivatedSkills = (event: MessageEvent): string[] =>
|
||||
(event as unknown as { activated_skills?: string[] }).activated_skills ||
|
||||
event.activated_microagents ||
|
||||
[];
|
||||
|
||||
/**
|
||||
* Checks if extended content contains valid text content.
|
||||
*/
|
||||
const hasValidExtendedContent = (
|
||||
extendedContent: MessageEvent["extended_content"],
|
||||
): boolean => {
|
||||
if (!extendedContent || extendedContent.length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return extendedContent.some(
|
||||
(content) => content.type === "text" && content.text.trim().length > 0,
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Determines if a Skill Ready event should be displayed for the given message event.
|
||||
*/
|
||||
const shouldShowSkillReadyEvent = (messageEvent: MessageEvent): boolean => {
|
||||
const activatedSkills = getActivatedSkills(messageEvent);
|
||||
const hasActivatedSkills = activatedSkills.length > 0;
|
||||
const hasExtendedContent = hasValidExtendedContent(
|
||||
messageEvent.extended_content,
|
||||
);
|
||||
|
||||
return hasActivatedSkills && hasExtendedContent;
|
||||
};
|
||||
|
||||
interface CommonProps {
|
||||
microagentStatus?: MicroagentStatus | null;
|
||||
microagentConversationId?: string;
|
||||
microagentPRUrl?: string;
|
||||
actions?: Array<{
|
||||
icon: React.ReactNode;
|
||||
onClick: () => void;
|
||||
tooltip?: string;
|
||||
}>;
|
||||
isLastMessage: boolean;
|
||||
isInLast10Actions: boolean;
|
||||
config: unknown;
|
||||
isCheckingFeedback: boolean;
|
||||
feedbackData: { exists: boolean };
|
||||
isFromPlanningAgent: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders a user message with its corresponding Skill Ready event.
|
||||
*/
|
||||
const renderUserMessageWithSkillReady = (
|
||||
messageEvent: MessageEvent,
|
||||
commonProps: CommonProps,
|
||||
isLastMessage: boolean,
|
||||
): React.ReactElement => {
|
||||
try {
|
||||
const skillReadyEvent = createSkillReadyEvent(messageEvent);
|
||||
return (
|
||||
<>
|
||||
<UserAssistantEventMessage
|
||||
event={messageEvent}
|
||||
microagentStatus={commonProps.microagentStatus}
|
||||
microagentConversationId={commonProps.microagentConversationId}
|
||||
microagentPRUrl={commonProps.microagentPRUrl}
|
||||
actions={commonProps.actions}
|
||||
isLastMessage={false}
|
||||
isFromPlanningAgent={commonProps.isFromPlanningAgent}
|
||||
/>
|
||||
<GenericEventMessageWrapper
|
||||
event={skillReadyEvent}
|
||||
isLastMessage={isLastMessage}
|
||||
/>
|
||||
</>
|
||||
);
|
||||
} catch (error) {
|
||||
// If skill ready event creation fails, just render the user message
|
||||
console.error("Failed to create skill ready event:", error);
|
||||
return (
|
||||
<UserAssistantEventMessage
|
||||
event={messageEvent}
|
||||
microagentStatus={commonProps.microagentStatus}
|
||||
microagentConversationId={commonProps.microagentConversationId}
|
||||
microagentPRUrl={commonProps.microagentPRUrl}
|
||||
actions={commonProps.actions}
|
||||
isLastMessage={isLastMessage}
|
||||
isFromPlanningAgent={commonProps.isFromPlanningAgent}
|
||||
/>
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
/* eslint-disable react/jsx-props-no-spreading */
|
||||
export function EventMessage({
|
||||
event,
|
||||
@@ -151,9 +51,6 @@ export function EventMessage({
|
||||
const feedbackData = { exists: false };
|
||||
const isCheckingFeedback = false;
|
||||
|
||||
// Read isFromPlanningAgent directly from the event object
|
||||
const isFromPlanningAgent = event.isFromPlanningAgent || false;
|
||||
|
||||
// Common props for components that need them
|
||||
const commonProps = {
|
||||
microagentStatus,
|
||||
@@ -165,7 +62,6 @@ export function EventMessage({
|
||||
config,
|
||||
isCheckingFeedback,
|
||||
feedbackData,
|
||||
isFromPlanningAgent,
|
||||
};
|
||||
|
||||
// Agent error events
|
||||
@@ -218,21 +114,10 @@ export function EventMessage({
|
||||
|
||||
// Message events (user and assistant messages)
|
||||
if (!isActionEvent(event) && !isObservationEvent(event)) {
|
||||
const messageEvent = event as MessageEvent;
|
||||
|
||||
// Check if this is a user message that should display a Skill Ready event
|
||||
if (isUserMessageEvent(event) && shouldShowSkillReadyEvent(messageEvent)) {
|
||||
return renderUserMessageWithSkillReady(
|
||||
messageEvent,
|
||||
commonProps,
|
||||
isLastMessage,
|
||||
);
|
||||
}
|
||||
|
||||
// Render normal message event (user or assistant)
|
||||
// This is a MessageEvent
|
||||
return (
|
||||
<UserAssistantEventMessage
|
||||
event={messageEvent}
|
||||
event={event as MessageEvent}
|
||||
{...commonProps}
|
||||
isLastMessage={isLastMessage}
|
||||
/>
|
||||
|
||||
@@ -31,13 +31,7 @@ interface ConversationSubscriptionsContextType {
|
||||
subscribeToConversation: (options: {
|
||||
conversationId: string;
|
||||
sessionApiKey: string | null;
|
||||
providersSet: (
|
||||
| "github"
|
||||
| "gitlab"
|
||||
| "bitbucket"
|
||||
| "azure_devops"
|
||||
| "enterprise_sso"
|
||||
)[];
|
||||
providersSet: ("github" | "gitlab" | "bitbucket" | "enterprise_sso")[];
|
||||
baseUrl: string;
|
||||
socketPath?: string;
|
||||
onEvent?: (event: unknown, conversationId: string) => void;
|
||||
@@ -141,13 +135,7 @@ export function ConversationSubscriptionsProvider({
|
||||
(options: {
|
||||
conversationId: string;
|
||||
sessionApiKey: string | null;
|
||||
providersSet: (
|
||||
| "github"
|
||||
| "gitlab"
|
||||
| "bitbucket"
|
||||
| "azure_devops"
|
||||
| "enterprise_sso"
|
||||
)[];
|
||||
providersSet: ("github" | "gitlab" | "bitbucket" | "enterprise_sso")[];
|
||||
baseUrl: string;
|
||||
socketPath?: string;
|
||||
onEvent?: (event: unknown, conversationId: string) => void;
|
||||
|
||||
@@ -22,13 +22,10 @@ import {
|
||||
isConversationStateUpdateEvent,
|
||||
isFullStateConversationStateUpdateEvent,
|
||||
isAgentStatusConversationStateUpdateEvent,
|
||||
isStatsConversationStateUpdateEvent,
|
||||
isExecuteBashActionEvent,
|
||||
isExecuteBashObservationEvent,
|
||||
isConversationErrorEvent,
|
||||
isPlanningFileEditorObservationEvent,
|
||||
} from "#/types/v1/type-guards";
|
||||
import { ConversationStateUpdateEventStats } from "#/types/v1/core/events/conversation-state-event";
|
||||
import { handleActionEventCacheInvalidation } from "#/utils/cache-utils";
|
||||
import { buildWebSocketUrl } from "#/utils/websocket-url";
|
||||
import type {
|
||||
@@ -39,8 +36,6 @@ import EventService from "#/api/event-service/event-service.api";
|
||||
import { useConversationStore } from "#/state/conversation-store";
|
||||
import { isBudgetOrCreditError } from "#/utils/error-handler";
|
||||
import { useTracking } from "#/hooks/use-tracking";
|
||||
import { useReadConversationFile } from "#/hooks/mutation/use-read-conversation-file";
|
||||
import useMetricsStore from "#/stores/metrics-store";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/naming-convention
|
||||
export type V1_WebSocketConnectionState =
|
||||
@@ -104,53 +99,12 @@ export function ConversationWebSocketProvider({
|
||||
number | null
|
||||
>(null);
|
||||
|
||||
const { conversationMode, setPlanContent } = useConversationStore();
|
||||
|
||||
// Hook for reading conversation file
|
||||
const { mutate: readConversationFile } = useReadConversationFile();
|
||||
const { conversationMode } = useConversationStore();
|
||||
|
||||
// Separate received event count tracking per connection
|
||||
const receivedEventCountRefMain = useRef(0);
|
||||
const receivedEventCountRefPlanning = useRef(0);
|
||||
|
||||
// Track the latest PlanningFileEditorObservation event during history replay
|
||||
// We'll only call the API once after history loading completes
|
||||
const latestPlanningFileEventRef = useRef<{
|
||||
path: string;
|
||||
conversationId: string;
|
||||
} | null>(null);
|
||||
|
||||
// Helper function to update metrics from stats event
|
||||
const updateMetricsFromStats = useCallback(
|
||||
(event: ConversationStateUpdateEventStats) => {
|
||||
if (event.value.usage_to_metrics?.agent) {
|
||||
const agentMetrics = event.value.usage_to_metrics.agent;
|
||||
const metrics = {
|
||||
cost: agentMetrics.accumulated_cost,
|
||||
max_budget_per_task: agentMetrics.max_budget_per_task ?? null,
|
||||
usage: agentMetrics.accumulated_token_usage
|
||||
? {
|
||||
prompt_tokens:
|
||||
agentMetrics.accumulated_token_usage.prompt_tokens,
|
||||
completion_tokens:
|
||||
agentMetrics.accumulated_token_usage.completion_tokens,
|
||||
cache_read_tokens:
|
||||
agentMetrics.accumulated_token_usage.cache_read_tokens,
|
||||
cache_write_tokens:
|
||||
agentMetrics.accumulated_token_usage.cache_write_tokens,
|
||||
context_window:
|
||||
agentMetrics.accumulated_token_usage.context_window,
|
||||
per_turn_token:
|
||||
agentMetrics.accumulated_token_usage.per_turn_token,
|
||||
}
|
||||
: null,
|
||||
};
|
||||
useMetricsStore.getState().setMetrics(metrics);
|
||||
}
|
||||
},
|
||||
[],
|
||||
);
|
||||
|
||||
// Build WebSocket URL from props
|
||||
// Only build URL if we have both conversationId and conversationUrl
|
||||
// This prevents connection attempts during task polling phase
|
||||
@@ -247,40 +201,11 @@ export function ConversationWebSocketProvider({
|
||||
receivedEventCountRefPlanning,
|
||||
]);
|
||||
|
||||
// Call API once after history loading completes if we tracked any PlanningFileEditorObservation events
|
||||
useEffect(() => {
|
||||
if (!isLoadingHistoryPlanning && latestPlanningFileEventRef.current) {
|
||||
const { path, conversationId: currentPlanningConversationId } =
|
||||
latestPlanningFileEventRef.current;
|
||||
|
||||
readConversationFile(
|
||||
{
|
||||
conversationId: currentPlanningConversationId,
|
||||
filePath: path,
|
||||
},
|
||||
{
|
||||
onSuccess: (fileContent) => {
|
||||
setPlanContent(fileContent);
|
||||
},
|
||||
onError: (error) => {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn("Failed to read conversation file:", error);
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
// Clear the ref after calling the API
|
||||
latestPlanningFileEventRef.current = null;
|
||||
}
|
||||
}, [isLoadingHistoryPlanning, readConversationFile, setPlanContent]);
|
||||
|
||||
useEffect(() => {
|
||||
hasConnectedRefMain.current = false;
|
||||
setIsLoadingHistoryPlanning(!!subConversationIds?.length);
|
||||
setExpectedEventCountPlanning(null);
|
||||
receivedEventCountRefPlanning.current = 0;
|
||||
// Reset the tracked event ref when sub-conversations change
|
||||
latestPlanningFileEventRef.current = null;
|
||||
}, [subConversationIds]);
|
||||
|
||||
// Merged loading history state - true if either connection is still loading
|
||||
@@ -295,8 +220,6 @@ export function ConversationWebSocketProvider({
|
||||
setIsLoadingHistoryMain(true);
|
||||
setExpectedEventCountMain(null);
|
||||
receivedEventCountRefMain.current = 0;
|
||||
// Reset the tracked event ref when conversation changes
|
||||
latestPlanningFileEventRef.current = null;
|
||||
}, [conversationId]);
|
||||
|
||||
// Separate message handlers for each connection
|
||||
@@ -364,9 +287,6 @@ export function ConversationWebSocketProvider({
|
||||
if (isAgentStatusConversationStateUpdateEvent(event)) {
|
||||
setExecutionStatus(event.value);
|
||||
}
|
||||
if (isStatsConversationStateUpdateEvent(event)) {
|
||||
updateMetricsFromStats(event);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle ExecuteBashAction events - add command as input to terminal
|
||||
@@ -400,7 +320,6 @@ export function ConversationWebSocketProvider({
|
||||
setExecutionStatus,
|
||||
appendInput,
|
||||
appendOutput,
|
||||
updateMetricsFromStats,
|
||||
],
|
||||
);
|
||||
|
||||
@@ -424,12 +343,7 @@ export function ConversationWebSocketProvider({
|
||||
|
||||
// Use type guard to validate v1 event structure
|
||||
if (isV1Event(event)) {
|
||||
// Mark this event as coming from the planning agent
|
||||
const eventWithPlanningFlag = {
|
||||
...event,
|
||||
isFromPlanningAgent: true,
|
||||
};
|
||||
addEvent(eventWithPlanningFlag);
|
||||
addEvent(event);
|
||||
|
||||
// Handle AgentErrorEvent specifically
|
||||
if (isAgentErrorEvent(event)) {
|
||||
@@ -462,9 +376,6 @@ export function ConversationWebSocketProvider({
|
||||
if (isAgentStatusConversationStateUpdateEvent(event)) {
|
||||
setExecutionStatus(event.value);
|
||||
}
|
||||
if (isStatsConversationStateUpdateEvent(event)) {
|
||||
updateMetricsFromStats(event);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle ExecuteBashAction events - add command as input to terminal
|
||||
@@ -481,41 +392,6 @@ export function ConversationWebSocketProvider({
|
||||
.join("\n");
|
||||
appendOutput(textContent);
|
||||
}
|
||||
|
||||
// Handle PlanningFileEditorObservation events - read and update plan content
|
||||
if (isPlanningFileEditorObservationEvent(event)) {
|
||||
const planningAgentConversation = subConversations?.[0];
|
||||
const planningConversationId = planningAgentConversation?.id;
|
||||
|
||||
if (planningConversationId && event.observation.path) {
|
||||
// During history replay, track the latest event but don't call API
|
||||
// After history loading completes, we'll call the API once with the latest event
|
||||
if (isLoadingHistoryPlanning) {
|
||||
latestPlanningFileEventRef.current = {
|
||||
path: event.observation.path,
|
||||
conversationId: planningConversationId,
|
||||
};
|
||||
} else {
|
||||
// History loading is complete - this is a new real-time event
|
||||
// Call the API immediately for real-time updates
|
||||
readConversationFile(
|
||||
{
|
||||
conversationId: planningConversationId,
|
||||
filePath: event.observation.path,
|
||||
},
|
||||
{
|
||||
onSuccess: (fileContent) => {
|
||||
setPlanContent(fileContent);
|
||||
},
|
||||
onError: (error) => {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn("Failed to read conversation file:", error);
|
||||
},
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// eslint-disable-next-line no-console
|
||||
@@ -533,9 +409,6 @@ export function ConversationWebSocketProvider({
|
||||
setExecutionStatus,
|
||||
appendInput,
|
||||
appendOutput,
|
||||
readConversationFile,
|
||||
setPlanContent,
|
||||
updateMetricsFromStats,
|
||||
],
|
||||
);
|
||||
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
import { useMutation } from "@tanstack/react-query";
|
||||
import V1ConversationService from "#/api/conversation-service/v1-conversation-service.api";
|
||||
|
||||
interface UseReadConversationFileVariables {
|
||||
conversationId: string;
|
||||
filePath?: string;
|
||||
}
|
||||
|
||||
export const useReadConversationFile = () =>
|
||||
useMutation({
|
||||
mutationKey: ["read-conversation-file"],
|
||||
mutationFn: async ({
|
||||
conversationId,
|
||||
filePath,
|
||||
}: UseReadConversationFileVariables): Promise<string> =>
|
||||
V1ConversationService.readConversationFile(conversationId, filePath),
|
||||
});
|
||||
@@ -1,11 +1,6 @@
|
||||
// this file generate by script, don't modify it manually!!!
|
||||
export enum I18nKey {
|
||||
MAINTENANCE$SCHEDULED_MESSAGE = "MAINTENANCE$SCHEDULED_MESSAGE",
|
||||
AZURE_DEVOPS$CONNECT_ACCOUNT = "AZURE_DEVOPS$CONNECT_ACCOUNT",
|
||||
GIT$AZURE_DEVOPS_TOKEN = "GIT$AZURE_DEVOPS_TOKEN",
|
||||
GIT$AZURE_DEVOPS_HOST = "GIT$AZURE_DEVOPS_HOST",
|
||||
GIT$AZURE_DEVOPS_HOST_PLACEHOLDER = "GIT$AZURE_DEVOPS_HOST_PLACEHOLDER",
|
||||
GIT$AZURE_DEVOPS_TOKEN_HELP = "GIT$AZURE_DEVOPS_TOKEN_HELP",
|
||||
MICROAGENT$NO_REPOSITORY_FOUND = "MICROAGENT$NO_REPOSITORY_FOUND",
|
||||
MICROAGENT$ADD_TO_MICROAGENT = "MICROAGENT$ADD_TO_MICROAGENT",
|
||||
MICROAGENT$WHAT_TO_ADD = "MICROAGENT$WHAT_TO_ADD",
|
||||
@@ -122,7 +117,6 @@ export enum I18nKey {
|
||||
SETTINGS$NAV_SECRETS = "SETTINGS$NAV_SECRETS",
|
||||
SETTINGS$NAV_API_KEYS = "SETTINGS$NAV_API_KEYS",
|
||||
SETTINGS$GITHUB = "SETTINGS$GITHUB",
|
||||
SETTINGS$AZURE_DEVOPS = "SETTINGS$AZURE_DEVOPS",
|
||||
SETTINGS$SLACK = "SETTINGS$SLACK",
|
||||
SETTINGS$NAV_LLM = "SETTINGS$NAV_LLM",
|
||||
GIT$MERGE_REQUEST = "GIT$MERGE_REQUEST",
|
||||
@@ -954,5 +948,4 @@ export enum I18nKey {
|
||||
COMMON$CODE_AGENT_DESCRIPTION = "COMMON$CODE_AGENT_DESCRIPTION",
|
||||
COMMON$PLAN_AGENT_DESCRIPTION = "COMMON$PLAN_AGENT_DESCRIPTION",
|
||||
PLANNING_AGENTT$PLANNING_AGENT_INITIALIZED = "PLANNING_AGENTT$PLANNING_AGENT_INITIALIZED",
|
||||
OBSERVATION_MESSAGE$SKILL_READY = "OBSERVATION_MESSAGE$SKILL_READY",
|
||||
}
|
||||
|
||||
@@ -15,86 +15,6 @@
|
||||
"de": "Die geplante Wartung beginnt um {{time}}",
|
||||
"uk": "Планове технічне обслуговування розпочнеться о {{time}}"
|
||||
},
|
||||
"AZURE_DEVOPS$CONNECT_ACCOUNT": {
|
||||
"en": "Connect Azure DevOps Account",
|
||||
"ja": "Azure DevOps アカウントを接続",
|
||||
"zh-CN": "连接 Azure DevOps 账户",
|
||||
"zh-TW": "連接 Azure DevOps 帳戶",
|
||||
"ko-KR": "Azure DevOps 계정 연결",
|
||||
"no": "Koble til Azure DevOps-konto",
|
||||
"it": "Connetti account Azure DevOps",
|
||||
"pt": "Conectar conta do Azure DevOps",
|
||||
"es": "Conectar cuenta de Azure DevOps",
|
||||
"ar": "ربط حساب Azure DevOps",
|
||||
"fr": "Connecter le compte Azure DevOps",
|
||||
"tr": "Azure DevOps hesabını bağla",
|
||||
"de": "Azure DevOps-Konto verbinden",
|
||||
"uk": "Підключити обліковий запис Azure DevOps"
|
||||
},
|
||||
"GIT$AZURE_DEVOPS_TOKEN": {
|
||||
"en": "Azure DevOps Personal Access Token",
|
||||
"ja": "Azure DevOps 個人用アクセス トークン",
|
||||
"zh-CN": "Azure DevOps 个人访问令牌",
|
||||
"zh-TW": "Azure DevOps 個人存取權杖",
|
||||
"ko-KR": "Azure DevOps 개인 액세스 토큰",
|
||||
"no": "Azure DevOps personlig tilgangstoken",
|
||||
"it": "Token di accesso personale Azure DevOps",
|
||||
"pt": "Token de acesso pessoal do Azure DevOps",
|
||||
"es": "Token de acceso personal de Azure DevOps",
|
||||
"ar": "رمز الوصول الشخصي لـ Azure DevOps",
|
||||
"fr": "Jeton d'accès personnel Azure DevOps",
|
||||
"tr": "Azure DevOps kişisel erişim belirteci",
|
||||
"de": "Azure DevOps persönliches Zugriffstoken",
|
||||
"uk": "Персональний токен доступу Azure DevOps"
|
||||
},
|
||||
"GIT$AZURE_DEVOPS_HOST": {
|
||||
"en": "Azure DevOps Organization",
|
||||
"ja": "Azure DevOps 組織",
|
||||
"zh-CN": "Azure DevOps 组织",
|
||||
"zh-TW": "Azure DevOps 組織",
|
||||
"ko-KR": "Azure DevOps 조직",
|
||||
"no": "Azure DevOps organisasjon",
|
||||
"it": "Organizzazione Azure DevOps",
|
||||
"pt": "Organização do Azure DevOps",
|
||||
"es": "Organización de Azure DevOps",
|
||||
"ar": "مؤسسة Azure DevOps",
|
||||
"fr": "Organisation Azure DevOps",
|
||||
"tr": "Azure DevOps kuruluş",
|
||||
"de": "Azure DevOps Organisation",
|
||||
"uk": "Організація Azure DevOps"
|
||||
},
|
||||
"GIT$AZURE_DEVOPS_HOST_PLACEHOLDER": {
|
||||
"en": "organization",
|
||||
"ja": "組織",
|
||||
"zh-CN": "组织",
|
||||
"zh-TW": "組織",
|
||||
"ko-KR": "조직",
|
||||
"no": "organisasjon",
|
||||
"it": "organizzazione",
|
||||
"pt": "organização",
|
||||
"es": "organización",
|
||||
"ar": "مؤسسة",
|
||||
"fr": "organisation",
|
||||
"tr": "kuruluş/proje",
|
||||
"de": "organisation/projekt",
|
||||
"uk": "організація/проект"
|
||||
},
|
||||
"GIT$AZURE_DEVOPS_TOKEN_HELP": {
|
||||
"en": "How to create an Azure DevOps token",
|
||||
"ja": "Azure DevOps トークンの作成方法",
|
||||
"zh-CN": "如何创建 Azure DevOps 令牌",
|
||||
"zh-TW": "如何創建 Azure DevOps 權杖",
|
||||
"ko-KR": "Azure DevOps 토큰 생성 방법",
|
||||
"no": "Hvordan lage et Azure DevOps-token",
|
||||
"it": "Come creare un token Azure DevOps",
|
||||
"pt": "Como criar um token do Azure DevOps",
|
||||
"es": "Cómo crear un token de Azure DevOps",
|
||||
"ar": "كيفية إنشاء رمز Azure DevOps",
|
||||
"fr": "Comment créer un jeton Azure DevOps",
|
||||
"tr": "Azure DevOps belirteci nasıl oluşturulur",
|
||||
"de": "Wie man ein Azure DevOps-Token erstellt",
|
||||
"uk": "Як створити токен Azure DevOps"
|
||||
},
|
||||
"MICROAGENT$NO_REPOSITORY_FOUND": {
|
||||
"en": "No repository found to launch microagent",
|
||||
"ja": "マイクロエージェントを起動するためのリポジトリが見つかりません",
|
||||
@@ -1312,20 +1232,20 @@
|
||||
"uk": "Невірний JSON"
|
||||
},
|
||||
"HOME$CONNECT_PROVIDER_MESSAGE": {
|
||||
"en": "To get started with suggested tasks, please connect your GitHub, GitLab, Bitbucket, or Azure DevOps account.",
|
||||
"ja": "提案されたタスクを始めるには、GitHub、GitLab、Bitbucket、またはAzure DevOpsアカウントを接続してください。",
|
||||
"zh-CN": "要开始使用建议的任务,请连接您的GitHub、GitLab、Bitbucket或Azure DevOps账户。",
|
||||
"zh-TW": "要開始使用建議的任務,請連接您的GitHub、GitLab、Bitbucket或Azure DevOps帳戶。",
|
||||
"ko-KR": "제안된 작업을 시작하려면 GitHub, GitLab, Bitbucket 또는 Azure DevOps 계정을 연결하세요.",
|
||||
"no": "For å komme i gang med foreslåtte oppgaver, vennligst koble til GitHub, GitLab, Bitbucket eller Azure DevOps-kontoen din.",
|
||||
"it": "Per iniziare con le attività suggerite, collega il tuo account GitHub, GitLab, Bitbucket o Azure DevOps.",
|
||||
"pt": "Para começar com tarefas sugeridas, conecte sua conta GitHub, GitLab, Bitbucket ou Azure DevOps.",
|
||||
"es": "Para comenzar con las tareas sugeridas, conecte su cuenta de GitHub, GitLab, Bitbucket o Azure DevOps.",
|
||||
"ar": "للبدء بالمهام المقترحة، يرجى ربط حساب GitHub أو GitLab أو Bitbucket أو Azure DevOps الخاص بك.",
|
||||
"fr": "Pour commencer avec les tâches suggérées, veuillez connecter votre compte GitHub, GitLab, Bitbucket ou Azure DevOps.",
|
||||
"tr": "Önerilen görevlerle başlamak için lütfen GitHub, GitLab, Bitbucket veya Azure DevOps hesabınızı bağlayın.",
|
||||
"de": "Um mit vorgeschlagenen Aufgaben zu beginnen, verbinden Sie bitte Ihr GitHub-, GitLab-, Bitbucket- oder Azure DevOps-Konto.",
|
||||
"uk": "Щоб розпочати роботу з запропонованими завданнями, підключіть свій обліковий запис GitHub, GitLab, Bitbucket або Azure DevOps."
|
||||
"en": "To get started with suggested tasks, please connect your GitHub, GitLab, or Bitbucket account.",
|
||||
"ja": "提案されたタスクを始めるには、GitHub、GitLab、またはBitbucketアカウントを接続してください。",
|
||||
"zh-CN": "要开始使用建议的任务,请连接您的GitHub、GitLab或Bitbucket账户。",
|
||||
"zh-TW": "要開始使用建議的任務,請連接您的GitHub、GitLab或Bitbucket帳戶。",
|
||||
"ko-KR": "제안된 작업을 시작하려면 GitHub, GitLab 또는 Bitbucket 계정을 연결하세요.",
|
||||
"no": "For å komme i gang med foreslåtte oppgaver, vennligst koble til GitHub, GitLab eller Bitbucket-kontoen din.",
|
||||
"it": "Per iniziare con le attività suggerite, collega il tuo account GitHub, GitLab o Bitbucket.",
|
||||
"pt": "Para começar com tarefas sugeridas, conecte sua conta GitHub, GitLab ou Bitbucket.",
|
||||
"es": "Para comenzar con las tareas sugeridas, conecte su cuenta de GitHub, GitLab o Bitbucket.",
|
||||
"ar": "للبدء بالمهام المقترحة، يرجى ربط حساب GitHub أو GitLab أو Bitbucket الخاص بك.",
|
||||
"fr": "Pour commencer avec les tâches suggérées, veuillez connecter votre compte GitHub, GitLab ou Bitbucket.",
|
||||
"tr": "Önerilen görevlerle başlamak için lütfen GitHub, GitLab veya Bitbucket hesabınızı bağlayın.",
|
||||
"de": "Um mit vorgeschlagenen Aufgaben zu beginnen, verbinden Sie bitte Ihr GitHub-, GitLab- oder Bitbucket-Konto.",
|
||||
"uk": "Щоб розпочати роботу з запропонованими завданнями, підключіть свій обліковий запис GitHub, GitLab або Bitbucket."
|
||||
},
|
||||
"HOME$LETS_START_BUILDING": {
|
||||
"en": "Let's Start Building!",
|
||||
@@ -1951,22 +1871,6 @@
|
||||
"de": "GitHub",
|
||||
"uk": "GitHub"
|
||||
},
|
||||
"SETTINGS$AZURE_DEVOPS": {
|
||||
"en": "Azure DevOps",
|
||||
"ja": "Azure DevOps",
|
||||
"zh-CN": "Azure DevOps",
|
||||
"zh-TW": "Azure DevOps",
|
||||
"ko-KR": "Azure DevOps",
|
||||
"no": "Azure DevOps",
|
||||
"it": "Azure DevOps",
|
||||
"pt": "Azure DevOps",
|
||||
"es": "Azure DevOps",
|
||||
"ar": "Azure DevOps",
|
||||
"fr": "Azure DevOps",
|
||||
"tr": "Azure DevOps",
|
||||
"de": "Azure DevOps",
|
||||
"uk": "Azure DevOps"
|
||||
},
|
||||
"SETTINGS$SLACK": {
|
||||
"en": "Slack",
|
||||
"ja": "Slack",
|
||||
@@ -15262,21 +15166,5 @@
|
||||
"tr": "Planlama ajanı başlatıldı",
|
||||
"de": "Planungsagent wurde initialisiert",
|
||||
"uk": "Агент планування ініціалізовано"
|
||||
},
|
||||
"OBSERVATION_MESSAGE$SKILL_READY": {
|
||||
"en": "Skill Ready",
|
||||
"ja": "スキル準備完了",
|
||||
"zh-CN": "技能已就绪",
|
||||
"zh-TW": "技能已就緒",
|
||||
"ko-KR": "스킬 준비 완료",
|
||||
"no": "Ferdighet klar",
|
||||
"it": "Abilità pronta",
|
||||
"pt": "Habilidade pronta",
|
||||
"es": "Habilidad lista",
|
||||
"ar": "المهارة جاهزة",
|
||||
"fr": "Compétence prête",
|
||||
"tr": "Yetenek hazır",
|
||||
"de": "Fähigkeit bereit",
|
||||
"uk": "Навичка готова"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ import { useLogout } from "#/hooks/mutation/use-logout";
|
||||
import { GitHubTokenInput } from "#/components/features/settings/git-settings/github-token-input";
|
||||
import { GitLabTokenInput } from "#/components/features/settings/git-settings/gitlab-token-input";
|
||||
import { BitbucketTokenInput } from "#/components/features/settings/git-settings/bitbucket-token-input";
|
||||
import { AzureDevOpsTokenInput } from "#/components/features/settings/git-settings/azure-devops-token-input";
|
||||
import { ConfigureGitHubRepositoriesAnchor } from "#/components/features/settings/git-settings/configure-github-repositories-anchor";
|
||||
import { InstallSlackAppAnchor } from "#/components/features/settings/git-settings/install-slack-app-anchor";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
@@ -38,8 +37,6 @@ function GitSettingsScreen() {
|
||||
React.useState(false);
|
||||
const [bitbucketTokenInputHasValue, setBitbucketTokenInputHasValue] =
|
||||
React.useState(false);
|
||||
const [azureDevOpsTokenInputHasValue, setAzureDevOpsTokenInputHasValue] =
|
||||
React.useState(false);
|
||||
|
||||
const [githubHostInputHasValue, setGithubHostInputHasValue] =
|
||||
React.useState(false);
|
||||
@@ -47,19 +44,15 @@ function GitSettingsScreen() {
|
||||
React.useState(false);
|
||||
const [bitbucketHostInputHasValue, setBitbucketHostInputHasValue] =
|
||||
React.useState(false);
|
||||
const [azureDevOpsHostInputHasValue, setAzureDevOpsHostInputHasValue] =
|
||||
React.useState(false);
|
||||
|
||||
const existingGithubHost = settings?.PROVIDER_TOKENS_SET.github;
|
||||
const existingGitlabHost = settings?.PROVIDER_TOKENS_SET.gitlab;
|
||||
const existingBitbucketHost = settings?.PROVIDER_TOKENS_SET.bitbucket;
|
||||
const existingAzureDevOpsHost = settings?.PROVIDER_TOKENS_SET.azure_devops;
|
||||
|
||||
const isSaas = config?.APP_MODE === "saas";
|
||||
const isGitHubTokenSet = providers.includes("github");
|
||||
const isGitLabTokenSet = providers.includes("gitlab");
|
||||
const isBitbucketTokenSet = providers.includes("bitbucket");
|
||||
const isAzureDevOpsTokenSet = providers.includes("azure_devops");
|
||||
|
||||
const formAction = async (formData: FormData) => {
|
||||
const disconnectButtonClicked =
|
||||
@@ -74,21 +67,16 @@ function GitSettingsScreen() {
|
||||
const gitlabToken = formData.get("gitlab-token-input")?.toString() || "";
|
||||
const bitbucketToken =
|
||||
formData.get("bitbucket-token-input")?.toString() || "";
|
||||
const azureDevOpsToken =
|
||||
formData.get("azure-devops-token-input")?.toString() || "";
|
||||
const githubHost = formData.get("github-host-input")?.toString() || "";
|
||||
const gitlabHost = formData.get("gitlab-host-input")?.toString() || "";
|
||||
const bitbucketHost =
|
||||
formData.get("bitbucket-host-input")?.toString() || "";
|
||||
const azureDevOpsHost =
|
||||
formData.get("azure-devops-host-input")?.toString() || "";
|
||||
|
||||
// Create providers object with all tokens
|
||||
const providerTokens: Record<string, { token: string; host: string }> = {
|
||||
github: { token: githubToken, host: githubHost },
|
||||
gitlab: { token: gitlabToken, host: gitlabHost },
|
||||
bitbucket: { token: bitbucketToken, host: bitbucketHost },
|
||||
azure_devops: { token: azureDevOpsToken, host: azureDevOpsHost },
|
||||
};
|
||||
|
||||
saveGitProviders(
|
||||
@@ -107,11 +95,9 @@ function GitSettingsScreen() {
|
||||
setGithubTokenInputHasValue(false);
|
||||
setGitlabTokenInputHasValue(false);
|
||||
setBitbucketTokenInputHasValue(false);
|
||||
setAzureDevOpsTokenInputHasValue(false);
|
||||
setGithubHostInputHasValue(false);
|
||||
setGitlabHostInputHasValue(false);
|
||||
setBitbucketHostInputHasValue(false);
|
||||
setAzureDevOpsHostInputHasValue(false);
|
||||
},
|
||||
},
|
||||
);
|
||||
@@ -121,11 +107,9 @@ function GitSettingsScreen() {
|
||||
!githubTokenInputHasValue &&
|
||||
!gitlabTokenInputHasValue &&
|
||||
!bitbucketTokenInputHasValue &&
|
||||
!azureDevOpsTokenInputHasValue &&
|
||||
!githubHostInputHasValue &&
|
||||
!gitlabHostInputHasValue &&
|
||||
!bitbucketHostInputHasValue &&
|
||||
!azureDevOpsHostInputHasValue;
|
||||
!bitbucketHostInputHasValue;
|
||||
const shouldRenderExternalConfigureButtons = isSaas && config.APP_SLUG;
|
||||
const shouldRenderProjectManagementIntegrations =
|
||||
config?.FEATURE_FLAGS?.ENABLE_JIRA ||
|
||||
@@ -212,20 +196,6 @@ function GitSettingsScreen() {
|
||||
bitbucketHostSet={existingBitbucketHost}
|
||||
/>
|
||||
)}
|
||||
|
||||
{!isSaas && (
|
||||
<AzureDevOpsTokenInput
|
||||
name="azure-devops-token-input"
|
||||
isAzureDevOpsTokenSet={isAzureDevOpsTokenSet}
|
||||
onChange={(value) => {
|
||||
setAzureDevOpsTokenInputHasValue(!!value);
|
||||
}}
|
||||
onAzureDevOpsHostChange={(value) => {
|
||||
setAzureDevOpsHostInputHasValue(!!value);
|
||||
}}
|
||||
azureDevOpsHostSet={existingAzureDevOpsHost}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
@@ -241,10 +211,7 @@ function GitSettingsScreen() {
|
||||
type="submit"
|
||||
variant="secondary"
|
||||
isDisabled={
|
||||
!isGitHubTokenSet &&
|
||||
!isGitLabTokenSet &&
|
||||
!isBitbucketTokenSet &&
|
||||
!isAzureDevOpsTokenSet
|
||||
!isGitHubTokenSet && !isGitLabTokenSet && !isBitbucketTokenSet
|
||||
}
|
||||
>
|
||||
{t(I18nKey.GIT$DISCONNECT_TOKENS)}
|
||||
|
||||
@@ -102,22 +102,10 @@ function LlmSettingsScreen() {
|
||||
: (settings?.SECURITY_ANALYZER ?? DEFAULT_SETTINGS.SECURITY_ANALYZER),
|
||||
);
|
||||
|
||||
const [selectedProvider, setSelectedProvider] = React.useState<string | null>(
|
||||
null,
|
||||
);
|
||||
|
||||
const modelsAndProviders = organizeModelsAndProviders(
|
||||
resources?.models || [],
|
||||
);
|
||||
|
||||
// Determine if we should hide the API key input and use OpenHands-managed key (when using OpenHands provider in SaaS mode)
|
||||
const currentModel = currentSelectedModel || settings?.LLM_MODEL;
|
||||
const isOpenHandsProvider =
|
||||
(view === "basic" && selectedProvider === "openhands") ||
|
||||
(view === "advanced" && currentModel?.startsWith("openhands/"));
|
||||
const isSaasMode = config?.APP_MODE === "saas";
|
||||
const shouldUseOpenHandsKey = isOpenHandsProvider && isSaasMode;
|
||||
|
||||
React.useEffect(() => {
|
||||
const determineWhetherToToggleAdvancedSettings = () => {
|
||||
if (resources && settings) {
|
||||
@@ -208,13 +196,10 @@ function LlmSettingsScreen() {
|
||||
|
||||
const fullLlmModel = provider && model && `${provider}/${model}`;
|
||||
|
||||
// Use OpenHands-managed key for OpenHands provider in SaaS mode
|
||||
const finalApiKey = shouldUseOpenHandsKey ? null : apiKey;
|
||||
|
||||
saveSettings(
|
||||
{
|
||||
LLM_MODEL: fullLlmModel,
|
||||
llm_api_key: finalApiKey || null,
|
||||
llm_api_key: apiKey || null,
|
||||
SEARCH_API_KEY: searchApiKey || "",
|
||||
CONFIRMATION_MODE: confirmationMode,
|
||||
SECURITY_ANALYZER:
|
||||
@@ -259,14 +244,11 @@ function LlmSettingsScreen() {
|
||||
.get("security-analyzer-input")
|
||||
?.toString();
|
||||
|
||||
// Use OpenHands-managed key for OpenHands provider in SaaS mode
|
||||
const finalApiKey = shouldUseOpenHandsKey ? null : apiKey;
|
||||
|
||||
saveSettings(
|
||||
{
|
||||
LLM_MODEL: model,
|
||||
LLM_BASE_URL: baseUrl,
|
||||
llm_api_key: finalApiKey || null,
|
||||
llm_api_key: apiKey || null,
|
||||
SEARCH_API_KEY: searchApiKey || "",
|
||||
AGENT: agent,
|
||||
CONFIRMATION_MODE: confirmationMode,
|
||||
@@ -300,10 +282,7 @@ function LlmSettingsScreen() {
|
||||
});
|
||||
};
|
||||
|
||||
const handleModelIsDirty = (
|
||||
provider: string | null,
|
||||
model: string | null,
|
||||
) => {
|
||||
const handleModelIsDirty = (model: string | null) => {
|
||||
// openai providers are special case; see ModelSelector
|
||||
// component for details
|
||||
const modelIsDirty = model !== settings?.LLM_MODEL.replace("openai/", "");
|
||||
@@ -314,15 +293,6 @@ function LlmSettingsScreen() {
|
||||
|
||||
// Track the currently selected model for help text display
|
||||
setCurrentSelectedModel(model);
|
||||
setSelectedProvider(provider);
|
||||
};
|
||||
|
||||
const onDefaultValuesChanged = (
|
||||
provider: string | null,
|
||||
model: string | null,
|
||||
) => {
|
||||
setSelectedProvider(provider);
|
||||
setCurrentSelectedModel(model);
|
||||
};
|
||||
|
||||
const handleApiKeyIsDirty = (apiKey: string) => {
|
||||
@@ -493,7 +463,6 @@ function LlmSettingsScreen() {
|
||||
models={modelsAndProviders}
|
||||
currentModel={settings.LLM_MODEL || DEFAULT_OPENHANDS_MODEL}
|
||||
onChange={handleModelIsDirty}
|
||||
onDefaultValuesChanged={onDefaultValuesChanged}
|
||||
wrapperClassName="!flex-col !gap-6"
|
||||
/>
|
||||
{(settings.LLM_MODEL?.startsWith("openhands/") ||
|
||||
@@ -503,31 +472,27 @@ function LlmSettingsScreen() {
|
||||
</>
|
||||
)}
|
||||
|
||||
{!shouldUseOpenHandsKey && (
|
||||
<>
|
||||
<SettingsInput
|
||||
testId="llm-api-key-input"
|
||||
name="llm-api-key-input"
|
||||
label={t(I18nKey.SETTINGS_FORM$API_KEY)}
|
||||
type="password"
|
||||
className="w-full max-w-[680px]"
|
||||
placeholder={settings.LLM_API_KEY_SET ? "<hidden>" : ""}
|
||||
onChange={handleApiKeyIsDirty}
|
||||
startContent={
|
||||
settings.LLM_API_KEY_SET && (
|
||||
<KeyStatusIcon isSet={settings.LLM_API_KEY_SET} />
|
||||
)
|
||||
}
|
||||
/>
|
||||
<SettingsInput
|
||||
testId="llm-api-key-input"
|
||||
name="llm-api-key-input"
|
||||
label={t(I18nKey.SETTINGS_FORM$API_KEY)}
|
||||
type="password"
|
||||
className="w-full max-w-[680px]"
|
||||
placeholder={settings.LLM_API_KEY_SET ? "<hidden>" : ""}
|
||||
onChange={handleApiKeyIsDirty}
|
||||
startContent={
|
||||
settings.LLM_API_KEY_SET && (
|
||||
<KeyStatusIcon isSet={settings.LLM_API_KEY_SET} />
|
||||
)
|
||||
}
|
||||
/>
|
||||
|
||||
<HelpLink
|
||||
testId="llm-api-key-help-anchor"
|
||||
text={t(I18nKey.SETTINGS$DONT_KNOW_API_KEY)}
|
||||
linkText={t(I18nKey.SETTINGS$CLICK_FOR_INSTRUCTIONS)}
|
||||
href="https://docs.all-hands.dev/usage/local-setup#getting-an-api-key"
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
<HelpLink
|
||||
testId="llm-api-key-help-anchor"
|
||||
text={t(I18nKey.SETTINGS$DONT_KNOW_API_KEY)}
|
||||
linkText={t(I18nKey.SETTINGS$CLICK_FOR_INSTRUCTIONS)}
|
||||
href="https://docs.all-hands.dev/usage/local-setup#getting-an-api-key"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -562,30 +527,26 @@ function LlmSettingsScreen() {
|
||||
onChange={handleBaseUrlIsDirty}
|
||||
/>
|
||||
|
||||
{!shouldUseOpenHandsKey && (
|
||||
<>
|
||||
<SettingsInput
|
||||
testId="llm-api-key-input"
|
||||
name="llm-api-key-input"
|
||||
label={t(I18nKey.SETTINGS_FORM$API_KEY)}
|
||||
type="password"
|
||||
className="w-full max-w-[680px]"
|
||||
placeholder={settings.LLM_API_KEY_SET ? "<hidden>" : ""}
|
||||
onChange={handleApiKeyIsDirty}
|
||||
startContent={
|
||||
settings.LLM_API_KEY_SET && (
|
||||
<KeyStatusIcon isSet={settings.LLM_API_KEY_SET} />
|
||||
)
|
||||
}
|
||||
/>
|
||||
<HelpLink
|
||||
testId="llm-api-key-help-anchor-advanced"
|
||||
text={t(I18nKey.SETTINGS$DONT_KNOW_API_KEY)}
|
||||
linkText={t(I18nKey.SETTINGS$CLICK_FOR_INSTRUCTIONS)}
|
||||
href="https://docs.all-hands.dev/usage/local-setup#getting-an-api-key"
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
<SettingsInput
|
||||
testId="llm-api-key-input"
|
||||
name="llm-api-key-input"
|
||||
label={t(I18nKey.SETTINGS_FORM$API_KEY)}
|
||||
type="password"
|
||||
className="w-full max-w-[680px]"
|
||||
placeholder={settings.LLM_API_KEY_SET ? "<hidden>" : ""}
|
||||
onChange={handleApiKeyIsDirty}
|
||||
startContent={
|
||||
settings.LLM_API_KEY_SET && (
|
||||
<KeyStatusIcon isSet={settings.LLM_API_KEY_SET} />
|
||||
)
|
||||
}
|
||||
/>
|
||||
<HelpLink
|
||||
testId="llm-api-key-help-anchor-advanced"
|
||||
text={t(I18nKey.SETTINGS$DONT_KNOW_API_KEY)}
|
||||
linkText={t(I18nKey.SETTINGS$CLICK_FOR_INSTRUCTIONS)}
|
||||
href="https://docs.all-hands.dev/usage/local-setup#getting-an-api-key"
|
||||
/>
|
||||
|
||||
{config?.APP_MODE !== "saas" && (
|
||||
<>
|
||||
|
||||
@@ -1,20 +1,49 @@
|
||||
import { useTranslation } from "react-i18next";
|
||||
import Markdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import remarkBreaks from "remark-breaks";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import LessonPlanIcon from "#/icons/lesson-plan.svg?react";
|
||||
import { useConversationStore } from "#/state/conversation-store";
|
||||
import { MarkdownRenderer } from "#/components/features/markdown/markdown-renderer";
|
||||
import { code } from "#/components/features/markdown/code";
|
||||
import { ul, ol } from "#/components/features/markdown/list";
|
||||
import { paragraph } from "#/components/features/markdown/paragraph";
|
||||
import { anchor } from "#/components/features/markdown/anchor";
|
||||
import {
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6,
|
||||
} from "#/components/features/markdown/headings";
|
||||
|
||||
function PlannerTab() {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const { planContent, setConversationMode } = useConversationStore();
|
||||
|
||||
if (planContent !== null && planContent !== undefined) {
|
||||
if (planContent) {
|
||||
return (
|
||||
<div className="flex flex-col w-full h-full p-4 overflow-auto">
|
||||
<MarkdownRenderer includeStandard includeHeadings>
|
||||
<Markdown
|
||||
components={{
|
||||
code,
|
||||
ul,
|
||||
ol,
|
||||
a: anchor,
|
||||
p: paragraph,
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6,
|
||||
}}
|
||||
remarkPlugins={[remarkGfm, remarkBreaks]}
|
||||
>
|
||||
{planContent}
|
||||
</MarkdownRenderer>
|
||||
</Markdown>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -56,7 +56,6 @@ interface ConversationActions {
|
||||
setHasRightPanelToggled: (hasRightPanelToggled: boolean) => void;
|
||||
setConversationMode: (conversationMode: ConversationMode) => void;
|
||||
setSubConversationTaskId: (taskId: string | null) => void;
|
||||
setPlanContent: (planContent: string | null) => void;
|
||||
}
|
||||
|
||||
type ConversationStore = ConversationState & ConversationActions;
|
||||
@@ -82,7 +81,91 @@ export const useConversationStore = create<ConversationStore>()(
|
||||
submittedMessage: null,
|
||||
shouldHideSuggestions: false,
|
||||
hasRightPanelToggled: true,
|
||||
planContent: null,
|
||||
planContent: `
|
||||
# Improve Developer Onboarding and Examples
|
||||
|
||||
## Overview
|
||||
|
||||
Based on the analysis of Browser-Use's current documentation and examples, this plan addresses gaps in developer onboarding by creating a progressive learning path, troubleshooting resources, and practical examples that address real-world scenarios (like the LM Studio/local LLM integration issues encountered).
|
||||
|
||||
## Current State Analysis
|
||||
|
||||
**Strengths:**
|
||||
|
||||
- Good quickstart documentation in \`docs/quickstart.mdx\`
|
||||
- Extensive examples across multiple categories (60+ example files)
|
||||
- Well-structured docs with multiple LLM provider examples
|
||||
- Active community support via Discord
|
||||
|
||||
**Gaps Identified:**
|
||||
|
||||
- No progressive tutorial series that builds complexity gradually
|
||||
- Limited troubleshooting documentation for common issues
|
||||
- Sparse comments in example files explaining what's happening
|
||||
- Local LLM setup (Ollama/LM Studio) not prominently featured
|
||||
- No "first 10 minutes" success path
|
||||
- Missing visual/conceptual architecture guides for beginners
|
||||
- Error messages don't always point to solutions
|
||||
|
||||
## Proposed Improvements
|
||||
|
||||
### 1. Create Interactive Tutorial Series (\`examples/tutorials/\`)
|
||||
|
||||
**New folder structure:**
|
||||
|
||||
\`\`\`
|
||||
examples/tutorials/
|
||||
├── README.md # Tutorial overview and prerequisites
|
||||
├── 00_hello_world.py # Absolute minimal example
|
||||
├── 01_your_first_search.py # Basic search with detailed comments
|
||||
├── 02_understanding_actions.py # How actions work
|
||||
├── 03_data_extraction_basics.py # Extract data step-by-step
|
||||
├── 04_error_handling.py # Common errors and solutions
|
||||
├── 05_custom_tools_intro.py # First custom tool
|
||||
├── 06_local_llm_setup.py # Ollama/LM Studio complete guide
|
||||
└── 07_debugging_tips.py # Debugging strategies
|
||||
\`\`\`
|
||||
|
||||
**Key Features:**
|
||||
|
||||
- Each file 50–80 lines max
|
||||
- Extensive inline comments explaining every concept
|
||||
- Clear learning objectives at the top of each file
|
||||
- "What you'll learn" and "Prerequisites" sections
|
||||
- Common pitfalls highlighted
|
||||
- Expected output shown in comments
|
||||
|
||||
### 2. Troubleshooting Guide (\`docs/troubleshooting.mdx\`)
|
||||
|
||||
**Sections:**
|
||||
|
||||
- Installation issues (Chromium, dependencies, virtual environments)
|
||||
- LLM provider connection errors (API keys, timeouts, rate limits)
|
||||
- Local LLM setup (Ollama vs LM Studio, model compatibility)
|
||||
- Browser automation issues (element not found, timeout errors)
|
||||
- Common error messages with solutions
|
||||
- Performance optimization tips
|
||||
- When to ask for help (Discord/GitHub)
|
||||
|
||||
**Format:**
|
||||
|
||||
**Error: "LLM call timed out after 60 seconds"**
|
||||
|
||||
**What it means:**
|
||||
The model took too long to respond
|
||||
|
||||
**Common causes:**
|
||||
|
||||
1. Model is too slow for the task
|
||||
2. LM Studio/Ollama not responding properly
|
||||
3. Complex page overwhelming the model
|
||||
|
||||
**Solutions:**
|
||||
|
||||
- Use flash_mode for faster execution
|
||||
- Try a faster model (Gemini Flash, GPT-4 Turbo Mini)
|
||||
- Simplify the task
|
||||
- Check model server logs`,
|
||||
conversationMode: "code",
|
||||
subConversationTaskId: null,
|
||||
|
||||
@@ -221,7 +304,6 @@ export const useConversationStore = create<ConversationStore>()(
|
||||
shouldHideSuggestions: false,
|
||||
conversationMode: "code",
|
||||
subConversationTaskId: null,
|
||||
planContent: null,
|
||||
},
|
||||
false,
|
||||
"resetConversationState",
|
||||
@@ -235,9 +317,6 @@ export const useConversationStore = create<ConversationStore>()(
|
||||
|
||||
setSubConversationTaskId: (subConversationTaskId) =>
|
||||
set({ subConversationTaskId }, false, "setSubConversationTaskId"),
|
||||
|
||||
setPlanContent: (planContent) =>
|
||||
set({ planContent }, false, "setPlanContent"),
|
||||
}),
|
||||
{
|
||||
name: "conversation-store",
|
||||
|
||||
@@ -5,9 +5,7 @@ import { OpenHandsParsedEvent } from "#/types/core";
|
||||
import { isV1Event } from "#/types/v1/type-guards";
|
||||
|
||||
// While we transition to v1 events, our store can handle both v0 and v1 events
|
||||
type OHEvent = (OpenHandsEvent | OpenHandsParsedEvent) & {
|
||||
isFromPlanningAgent?: boolean;
|
||||
};
|
||||
type OHEvent = OpenHandsEvent | OpenHandsParsedEvent;
|
||||
|
||||
interface EventState {
|
||||
events: OHEvent[];
|
||||
|
||||
@@ -2,7 +2,6 @@ export const ProviderOptions = {
|
||||
github: "github",
|
||||
gitlab: "gitlab",
|
||||
bitbucket: "bitbucket",
|
||||
azure_devops: "azure_devops",
|
||||
enterprise_sso: "enterprise_sso",
|
||||
} as const;
|
||||
|
||||
|
||||
@@ -41,25 +41,6 @@ export interface ExecuteBashAction extends ActionBase<"ExecuteBashAction"> {
|
||||
reset: boolean;
|
||||
}
|
||||
|
||||
export interface TerminalAction extends ActionBase<"TerminalAction"> {
|
||||
/**
|
||||
* The terminal command to execute.
|
||||
*/
|
||||
command: string;
|
||||
/**
|
||||
* If True, the command is an input to the running process. If False, the command is executed directly.
|
||||
*/
|
||||
is_input: boolean;
|
||||
/**
|
||||
* Optional max time limit (seconds) for the command.
|
||||
*/
|
||||
timeout: number | null;
|
||||
/**
|
||||
* If True, reset the terminal session before running the command.
|
||||
*/
|
||||
reset: boolean;
|
||||
}
|
||||
|
||||
export interface FileEditorAction extends ActionBase<"FileEditorAction"> {
|
||||
/**
|
||||
* The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.
|
||||
@@ -225,7 +206,6 @@ export type Action =
|
||||
| FinishAction
|
||||
| ThinkAction
|
||||
| ExecuteBashAction
|
||||
| TerminalAction
|
||||
| FileEditorAction
|
||||
| StrReplaceEditorAction
|
||||
| TaskTrackerAction
|
||||
|
||||
@@ -3,11 +3,9 @@ type EventType =
|
||||
| "Finish"
|
||||
| "Think"
|
||||
| "ExecuteBash"
|
||||
| "Terminal"
|
||||
| "FileEditor"
|
||||
| "StrReplaceEditor"
|
||||
| "TaskTracker"
|
||||
| "PlanningFileEditor";
|
||||
| "TaskTracker";
|
||||
|
||||
type ActionOnlyType =
|
||||
| "BrowserNavigate"
|
||||
@@ -26,8 +24,7 @@ type ObservationOnlyType = "Browser";
|
||||
type ActionEventType = `${ActionOnlyType}Action` | `${EventType}Action`;
|
||||
type ObservationEventType =
|
||||
| `${ObservationOnlyType}Observation`
|
||||
| `${EventType}Observation`
|
||||
| "TerminalObservation";
|
||||
| `${EventType}Observation`;
|
||||
|
||||
export interface ActionBase<T extends ActionEventType = ActionEventType> {
|
||||
kind: T;
|
||||
|
||||
@@ -25,13 +25,9 @@ export interface MCPToolObservation
|
||||
export interface FinishObservation
|
||||
extends ObservationBase<"FinishObservation"> {
|
||||
/**
|
||||
* Content returned from the finish action as a list of TextContent/ImageContent objects.
|
||||
* Final message sent to the user
|
||||
*/
|
||||
content: Array<TextContent | ImageContent>;
|
||||
/**
|
||||
* Whether the finish action resulted in an error
|
||||
*/
|
||||
is_error: boolean;
|
||||
message: string;
|
||||
}
|
||||
|
||||
export interface ThinkObservation extends ObservationBase<"ThinkObservation"> {
|
||||
@@ -85,34 +81,6 @@ export interface ExecuteBashObservation
|
||||
metadata: CmdOutputMetadata;
|
||||
}
|
||||
|
||||
export interface TerminalObservation
|
||||
extends ObservationBase<"TerminalObservation"> {
|
||||
/**
|
||||
* Content returned from the terminal as a list of TextContent/ImageContent objects.
|
||||
*/
|
||||
content: Array<TextContent | ImageContent>;
|
||||
/**
|
||||
* The bash command that was executed.
|
||||
*/
|
||||
command: string | null;
|
||||
/**
|
||||
* The exit code of the command if it has finished.
|
||||
*/
|
||||
exit_code: number | null;
|
||||
/**
|
||||
* Whether the command execution produced an error.
|
||||
*/
|
||||
is_error: boolean;
|
||||
/**
|
||||
* Whether the command execution timed out.
|
||||
*/
|
||||
timeout: boolean;
|
||||
/**
|
||||
* Additional metadata captured from the shell after command execution.
|
||||
*/
|
||||
metadata: CmdOutputMetadata;
|
||||
}
|
||||
|
||||
export interface FileEditorObservation
|
||||
extends ObservationBase<"FileEditorObservation"> {
|
||||
/**
|
||||
@@ -194,46 +162,12 @@ export interface TaskTrackerObservation
|
||||
task_list: TaskItem[];
|
||||
}
|
||||
|
||||
export interface PlanningFileEditorObservation
|
||||
extends ObservationBase<"PlanningFileEditorObservation"> {
|
||||
/**
|
||||
* Content returned from the tool as a list of TextContent/ImageContent objects.
|
||||
*/
|
||||
content: Array<TextContent | ImageContent>;
|
||||
/**
|
||||
* Whether the call resulted in an error.
|
||||
*/
|
||||
is_error: boolean;
|
||||
/**
|
||||
* The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.
|
||||
*/
|
||||
command: "view" | "create" | "str_replace" | "insert" | "undo_edit";
|
||||
/**
|
||||
* The file path that was edited.
|
||||
*/
|
||||
path: string | null;
|
||||
/**
|
||||
* Indicates if the file previously existed. If not, it was created.
|
||||
*/
|
||||
prev_exist: boolean;
|
||||
/**
|
||||
* The content of the file before the edit.
|
||||
*/
|
||||
old_content: string | null;
|
||||
/**
|
||||
* The content of the file after the edit.
|
||||
*/
|
||||
new_content: string | null;
|
||||
}
|
||||
|
||||
export type Observation =
|
||||
| MCPToolObservation
|
||||
| FinishObservation
|
||||
| ThinkObservation
|
||||
| BrowserObservation
|
||||
| ExecuteBashObservation
|
||||
| TerminalObservation
|
||||
| FileEditorObservation
|
||||
| StrReplaceEditorObservation
|
||||
| TaskTrackerObservation
|
||||
| PlanningFileEditorObservation;
|
||||
| TaskTrackerObservation;
|
||||
|
||||
@@ -1,63 +1,11 @@
|
||||
import { BaseEvent } from "../base/event";
|
||||
import { V1ExecutionStatus } from "../base/common";
|
||||
|
||||
/**
|
||||
* Token usage metrics for LLM calls
|
||||
*/
|
||||
export interface TokenUsage {
|
||||
model: string;
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
cache_read_tokens: number;
|
||||
cache_write_tokens: number;
|
||||
reasoning_tokens: number;
|
||||
context_window: number;
|
||||
per_turn_token: number;
|
||||
response_id: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* LLM metrics for a specific component (agent or condenser)
|
||||
*/
|
||||
export interface LLMMetrics {
|
||||
model_name: string;
|
||||
accumulated_cost: number;
|
||||
max_budget_per_task: number | null;
|
||||
accumulated_token_usage: TokenUsage;
|
||||
costs: Array<{
|
||||
model: string;
|
||||
cost: number;
|
||||
timestamp: number;
|
||||
}>;
|
||||
response_latencies: Array<{
|
||||
model: string;
|
||||
latency: number;
|
||||
response_id: string;
|
||||
}>;
|
||||
token_usages: TokenUsage[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Usage metrics mapping for different components
|
||||
*/
|
||||
export interface UsageToMetrics {
|
||||
agent: LLMMetrics;
|
||||
condenser: LLMMetrics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stats containing usage metrics
|
||||
*/
|
||||
export interface ConversationStats {
|
||||
usage_to_metrics: UsageToMetrics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Conversation state value types
|
||||
*/
|
||||
export interface ConversationState {
|
||||
execution_status: V1ExecutionStatus;
|
||||
stats?: ConversationStats;
|
||||
// Add other conversation state fields here as needed
|
||||
}
|
||||
|
||||
@@ -71,12 +19,12 @@ interface ConversationStateUpdateEventBase extends BaseEvent {
|
||||
* Unique key for this state update event.
|
||||
* Can be "full_state" for full state snapshots or field names for partial updates.
|
||||
*/
|
||||
key: "full_state" | "execution_status" | "stats"; // Extend with other keys as needed
|
||||
key: "full_state" | "execution_status"; // Extend with other keys as needed
|
||||
|
||||
/**
|
||||
* Conversation state updates
|
||||
*/
|
||||
value: ConversationState | V1ExecutionStatus | ConversationStats;
|
||||
value: ConversationState | V1ExecutionStatus;
|
||||
}
|
||||
|
||||
// Narrowed interfaces for full state update event
|
||||
@@ -93,18 +41,10 @@ export interface ConversationStateUpdateEventAgentStatus
|
||||
value: V1ExecutionStatus;
|
||||
}
|
||||
|
||||
// Narrowed interface for stats update event
|
||||
export interface ConversationStateUpdateEventStats
|
||||
extends ConversationStateUpdateEventBase {
|
||||
key: "stats";
|
||||
value: ConversationStats;
|
||||
}
|
||||
|
||||
// Conversation state update event - contains conversation state updates
|
||||
export type ConversationStateUpdateEvent =
|
||||
| ConversationStateUpdateEventFullState
|
||||
| ConversationStateUpdateEventAgentStatus
|
||||
| ConversationStateUpdateEventStats;
|
||||
| ConversationStateUpdateEventAgentStatus;
|
||||
|
||||
// Conversation error event - contains error information
|
||||
export interface ConversationErrorEvent extends BaseEvent {
|
||||
|
||||
@@ -3,10 +3,7 @@ import {
|
||||
ObservationEvent,
|
||||
BaseEvent,
|
||||
ExecuteBashAction,
|
||||
TerminalAction,
|
||||
ExecuteBashObservation,
|
||||
PlanningFileEditorObservation,
|
||||
TerminalObservation,
|
||||
} from "./core";
|
||||
import { AgentErrorEvent } from "./core/events/observation-event";
|
||||
import { MessageEvent } from "./core/events/message-event";
|
||||
@@ -15,7 +12,6 @@ import {
|
||||
ConversationStateUpdateEvent,
|
||||
ConversationStateUpdateEventAgentStatus,
|
||||
ConversationStateUpdateEventFullState,
|
||||
ConversationStateUpdateEventStats,
|
||||
ConversationErrorEvent,
|
||||
} from "./core/events/conversation-state-event";
|
||||
import { SystemPromptEvent } from "./core/events/system-event";
|
||||
@@ -102,29 +98,17 @@ export const isActionEvent = (event: OpenHandsEvent): event is ActionEvent =>
|
||||
*/
|
||||
export const isExecuteBashActionEvent = (
|
||||
event: OpenHandsEvent,
|
||||
): event is ActionEvent<ExecuteBashAction | TerminalAction> =>
|
||||
isActionEvent(event) &&
|
||||
(event.action.kind === "ExecuteBashAction" ||
|
||||
event.action.kind === "TerminalAction");
|
||||
): event is ActionEvent<ExecuteBashAction> =>
|
||||
isActionEvent(event) && event.action.kind === "ExecuteBashAction";
|
||||
|
||||
/**
|
||||
* Type guard function to check if an observation event contains terminal output
|
||||
* Type guard function to check if an observation event is an ExecuteBashObservation
|
||||
*/
|
||||
export const isExecuteBashObservationEvent = (
|
||||
event: OpenHandsEvent,
|
||||
): event is ObservationEvent<ExecuteBashObservation | TerminalObservation> =>
|
||||
): event is ObservationEvent<ExecuteBashObservation> =>
|
||||
isObservationEvent(event) &&
|
||||
(event.observation.kind === "ExecuteBashObservation" ||
|
||||
event.observation.kind === "TerminalObservation");
|
||||
|
||||
/**
|
||||
* Type guard function to check if an observation event is a PlanningFileEditorObservation
|
||||
*/
|
||||
export const isPlanningFileEditorObservationEvent = (
|
||||
event: OpenHandsEvent,
|
||||
): event is ObservationEvent<PlanningFileEditorObservation> =>
|
||||
isObservationEvent(event) &&
|
||||
event.observation.kind === "PlanningFileEditorObservation";
|
||||
event.observation.kind === "ExecuteBashObservation";
|
||||
|
||||
/**
|
||||
* Type guard function to check if an event is a system prompt event
|
||||
@@ -155,10 +139,6 @@ export const isAgentStatusConversationStateUpdateEvent = (
|
||||
): event is ConversationStateUpdateEventAgentStatus =>
|
||||
event.key === "execution_status";
|
||||
|
||||
export const isStatsConversationStateUpdateEvent = (
|
||||
event: ConversationStateUpdateEvent,
|
||||
): event is ConversationStateUpdateEventStats => event.key === "stats";
|
||||
|
||||
/**
|
||||
* Type guard function to check if an event is a conversation error event
|
||||
*/
|
||||
|
||||
@@ -1,45 +1,16 @@
|
||||
/**
|
||||
* Parses a date string as UTC if it doesn't have a timezone indicator.
|
||||
* This fixes the issue where ISO strings without timezone info are interpreted as local time.
|
||||
* @param dateString ISO 8601 date string
|
||||
* @returns Date object parsed as UTC
|
||||
*
|
||||
* @example
|
||||
* parseDateAsUTC("2025-12-01T11:53:37.273886"); // Parsed as UTC
|
||||
* parseDateAsUTC("2025-12-01T11:53:37.273886Z"); // Already has timezone, parsed correctly
|
||||
* parseDateAsUTC("2025-12-01T11:53:37+00:00"); // Already has timezone, parsed correctly
|
||||
*/
|
||||
const parseDateAsUTC = (dateString: string): Date => {
|
||||
// Check if the string already has a timezone indicator
|
||||
// Look for 'Z' (UTC), '+' (positive offset), or '-' after the time part (negative offset)
|
||||
const hasTimezone =
|
||||
dateString.includes("Z") || dateString.match(/[+-]\d{2}:\d{2}$/) !== null;
|
||||
|
||||
if (hasTimezone) {
|
||||
// Already has timezone info, parse normally
|
||||
return new Date(dateString);
|
||||
}
|
||||
|
||||
// No timezone indicator - append 'Z' to force UTC parsing
|
||||
return new Date(`${dateString}Z`);
|
||||
};
|
||||
|
||||
/**
|
||||
* Formats a date into a compact string representing the time delta between the given date and the current date.
|
||||
* @param date The date to format (Date object or ISO 8601 string)
|
||||
* @param date The date to format
|
||||
* @returns A compact string representing the time delta between the given date and the current date
|
||||
*
|
||||
* @example
|
||||
* // now is 2024-01-01T00:00:00Z
|
||||
* formatTimeDelta(new Date("2023-12-31T23:59:59Z")); // "1s"
|
||||
* formatTimeDelta("2023-12-31T23:59:59Z"); // "1s"
|
||||
* formatTimeDelta("2025-12-01T11:53:37.273886"); // Parsed as UTC automatically
|
||||
* formatTimeDelta(new Date("2022-01-01T00:00:00Z")); // "2y"
|
||||
*/
|
||||
export const formatTimeDelta = (date: Date | string) => {
|
||||
// Parse string dates as UTC if needed, or use Date object directly
|
||||
const dateObj = typeof date === "string" ? parseDateAsUTC(date) : date;
|
||||
export const formatTimeDelta = (date: Date) => {
|
||||
const now = new Date();
|
||||
const delta = now.getTime() - dateObj.getTime();
|
||||
const delta = now.getTime() - date.getTime();
|
||||
|
||||
const seconds = Math.floor(delta / 1000);
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/**
|
||||
* Generates a URL to redirect to for OAuth authentication
|
||||
* @param identityProvider The identity provider to use (e.g., "github", "gitlab", "bitbucket", "azure_devops")
|
||||
* @param identityProvider The identity provider to use (e.g., "github", "gitlab", "bitbucket")
|
||||
* @param requestUrl The URL of the request
|
||||
* @returns The URL to redirect to for OAuth
|
||||
*/
|
||||
|
||||
@@ -8,13 +8,12 @@ export enum LoginMethod {
|
||||
GITHUB = "github",
|
||||
GITLAB = "gitlab",
|
||||
BITBUCKET = "bitbucket",
|
||||
AZURE_DEVOPS = "azure_devops",
|
||||
ENTERPRISE_SSO = "enterprise_sso",
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the login method in local storage
|
||||
* @param method The login method (github, gitlab, bitbucket, or azure_devops)
|
||||
* @param method The login method (github, gitlab, or bitbucket)
|
||||
*/
|
||||
export const setLoginMethod = (method: LoginMethod): void => {
|
||||
localStorage.setItem(LOCAL_STORAGE_KEYS.LOGIN_METHOD, method);
|
||||
|
||||
@@ -4,8 +4,6 @@
|
||||
* Using CDN approach for better TypeScript compatibility
|
||||
*/
|
||||
|
||||
import EventLogger from "./event-logger";
|
||||
|
||||
export interface ReoIdentity {
|
||||
username: string;
|
||||
type: "github" | "email";
|
||||
@@ -43,7 +41,7 @@ class ReoService {
|
||||
this.initialized = true;
|
||||
}
|
||||
} catch (error) {
|
||||
EventLogger.error(`Failed to initialize Reo.dev tracking: ${error}`);
|
||||
console.error("Failed to initialize Reo.dev tracking:", error);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,7 +78,7 @@ class ReoService {
|
||||
*/
|
||||
identify(identity: ReoIdentity): void {
|
||||
if (!this.initialized) {
|
||||
EventLogger.warning("Reo.dev not initialized. Call init() first.");
|
||||
console.warn("Reo.dev not initialized. Call init() first.");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -89,7 +87,7 @@ class ReoService {
|
||||
window.Reo.identify(identity);
|
||||
}
|
||||
} catch (error) {
|
||||
EventLogger.error(`Failed to identify user in Reo.dev: ${error}`);
|
||||
console.error("Failed to identify user in Reo.dev:", error);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -182,8 +182,6 @@ export const shouldUseInstallationRepos = (
|
||||
return true;
|
||||
case "gitlab":
|
||||
return false;
|
||||
case "azure_devops":
|
||||
return false;
|
||||
case "github":
|
||||
return app_mode === "saas";
|
||||
default:
|
||||
@@ -199,8 +197,6 @@ export const getGitProviderBaseUrl = (gitProvider: Provider): string => {
|
||||
return "https://gitlab.com";
|
||||
case "bitbucket":
|
||||
return "https://bitbucket.org";
|
||||
case "azure_devops":
|
||||
return "https://dev.azure.com";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
@@ -214,7 +210,6 @@ export const getGitProviderBaseUrl = (gitProvider: Provider): string => {
|
||||
export const getProviderName = (gitProvider: Provider) => {
|
||||
if (gitProvider === "gitlab") return "GitLab";
|
||||
if (gitProvider === "bitbucket") return "Bitbucket";
|
||||
if (gitProvider === "azure_devops") return "Azure DevOps";
|
||||
return "GitHub";
|
||||
};
|
||||
|
||||
@@ -259,15 +254,6 @@ export const constructPullRequestUrl = (
|
||||
return `${baseUrl}/${repositoryName}/-/merge_requests/${prNumber}`;
|
||||
case "bitbucket":
|
||||
return `${baseUrl}/${repositoryName}/pull-requests/${prNumber}`;
|
||||
case "azure_devops": {
|
||||
// Azure DevOps format: org/project/repo
|
||||
const parts = repositoryName.split("/");
|
||||
if (parts.length === 3) {
|
||||
const [org, project, repo] = parts;
|
||||
return `${baseUrl}/${org}/${project}/_git/${repo}/pullrequest/${prNumber}`;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
@@ -302,15 +288,6 @@ export const constructMicroagentUrl = (
|
||||
return `${baseUrl}/${repositoryName}/-/blob/main/${microagentPath}`;
|
||||
case "bitbucket":
|
||||
return `${baseUrl}/${repositoryName}/src/main/${microagentPath}`;
|
||||
case "azure_devops": {
|
||||
// Azure DevOps format: org/project/repo
|
||||
const parts = repositoryName.split("/");
|
||||
if (parts.length === 3) {
|
||||
const [org, project, repo] = parts;
|
||||
return `${baseUrl}/${org}/${project}/_git/${repo}?path=/${microagentPath}&version=GBmain`;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
@@ -380,15 +357,6 @@ export const constructBranchUrl = (
|
||||
return `${baseUrl}/${repositoryName}/-/tree/${branchName}`;
|
||||
case "bitbucket":
|
||||
return `${baseUrl}/${repositoryName}/src/${branchName}`;
|
||||
case "azure_devops": {
|
||||
// Azure DevOps format: org/project/repo
|
||||
const parts = repositoryName.split("/");
|
||||
if (parts.length === 3) {
|
||||
const [org, project, repo] = parts;
|
||||
return `${baseUrl}/${org}/${project}/_git/${repo}?version=GB${branchName}`;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
|
||||
@@ -59,7 +59,6 @@ export const VERIFIED_ANTHROPIC_MODELS = [
|
||||
"claude-haiku-4-5-20251001",
|
||||
"claude-opus-4-20250514",
|
||||
"claude-opus-4-1-20250805",
|
||||
"claude-opus-4-5-20251101",
|
||||
];
|
||||
|
||||
// LiteLLM does not return the compatible Mistral models with the provider, so we list them here to set them ourselves
|
||||
|
||||
@@ -72,7 +72,7 @@ Your primary role is to assist users by executing commands, modifying code, and
|
||||
</SECURITY_RISK_ASSESSMENT>
|
||||
|
||||
<EXTERNAL_SERVICES>
|
||||
* When interacting with external services like GitHub, GitLab, Bitbucket, or Azure DevOps, use their respective APIs instead of browser-based interactions whenever possible.
|
||||
* When interacting with external services like GitHub, GitLab, or Bitbucket, use their respective APIs instead of browser-based interactions whenever possible.
|
||||
* Only resort to browser-based interactions with these services if specifically requested by the user or if the required operation cannot be performed via API.
|
||||
</EXTERNAL_SERVICES>
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from uuid import UUID, uuid4
|
||||
from uuid import uuid4
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
@@ -97,9 +97,7 @@ class AppConversationStartRequest(BaseModel):
|
||||
"""
|
||||
|
||||
sandbox_id: str | None = Field(default=None)
|
||||
conversation_id: UUID | None = Field(default=None)
|
||||
initial_message: SendMessageRequest | None = None
|
||||
system_message_suffix: str | None = None
|
||||
processors: list[EventCallbackProcessor] | None = Field(default=None)
|
||||
llm_model: str | None = None
|
||||
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
"""Sandboxed Conversation router for OpenHands Server."""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from typing import Annotated, AsyncGenerator
|
||||
from uuid import UUID
|
||||
@@ -51,21 +49,9 @@ from openhands.app_server.config import (
|
||||
depends_app_conversation_start_task_service,
|
||||
depends_db_session,
|
||||
depends_httpx_client,
|
||||
depends_sandbox_service,
|
||||
depends_sandbox_spec_service,
|
||||
depends_user_context,
|
||||
get_app_conversation_service,
|
||||
)
|
||||
from openhands.app_server.sandbox.sandbox_models import (
|
||||
AGENT_SERVER,
|
||||
SandboxStatus,
|
||||
)
|
||||
from openhands.app_server.sandbox.sandbox_service import SandboxService
|
||||
from openhands.app_server.sandbox.sandbox_spec_service import SandboxSpecService
|
||||
from openhands.app_server.utils.docker_utils import (
|
||||
replace_localhost_hostname_for_docker,
|
||||
)
|
||||
from openhands.sdk.workspace.remote.async_remote_workspace import AsyncRemoteWorkspace
|
||||
|
||||
router = APIRouter(prefix='/app-conversations', tags=['Conversations'])
|
||||
app_conversation_service_dependency = depends_app_conversation_service()
|
||||
@@ -75,8 +61,6 @@ app_conversation_start_task_service_dependency = (
|
||||
user_context_dependency = depends_user_context()
|
||||
db_session_dependency = depends_db_session()
|
||||
httpx_client_dependency = depends_httpx_client()
|
||||
sandbox_service_dependency = depends_sandbox_service()
|
||||
sandbox_spec_service_dependency = depends_sandbox_spec_service()
|
||||
|
||||
# Read methods
|
||||
|
||||
@@ -305,101 +289,6 @@ async def batch_get_app_conversation_start_tasks(
|
||||
return start_tasks
|
||||
|
||||
|
||||
@router.get('/{conversation_id}/file')
|
||||
async def read_conversation_file(
|
||||
conversation_id: UUID,
|
||||
file_path: Annotated[
|
||||
str,
|
||||
Query(title='Path to the file to read within the sandbox workspace'),
|
||||
] = '/workspace/project/PLAN.md',
|
||||
app_conversation_service: AppConversationService = (
|
||||
app_conversation_service_dependency
|
||||
),
|
||||
sandbox_service: SandboxService = sandbox_service_dependency,
|
||||
sandbox_spec_service: SandboxSpecService = sandbox_spec_service_dependency,
|
||||
) -> str:
|
||||
"""Read a file from a specific conversation's sandbox workspace.
|
||||
|
||||
Returns the content of the file at the specified path if it exists, otherwise returns an empty string.
|
||||
|
||||
Args:
|
||||
conversation_id: The UUID of the conversation
|
||||
file_path: Path to the file to read within the sandbox workspace
|
||||
|
||||
Returns:
|
||||
The content of the file or an empty string if the file doesn't exist
|
||||
"""
|
||||
# Get the conversation info
|
||||
conversation = await app_conversation_service.get_app_conversation(conversation_id)
|
||||
if not conversation:
|
||||
return ''
|
||||
|
||||
# Get the sandbox info
|
||||
sandbox = await sandbox_service.get_sandbox(conversation.sandbox_id)
|
||||
if not sandbox or sandbox.status != SandboxStatus.RUNNING:
|
||||
return ''
|
||||
|
||||
# Get the sandbox spec to find the working directory
|
||||
sandbox_spec = await sandbox_spec_service.get_sandbox_spec(sandbox.sandbox_spec_id)
|
||||
if not sandbox_spec:
|
||||
return ''
|
||||
|
||||
# Get the agent server URL
|
||||
if not sandbox.exposed_urls:
|
||||
return ''
|
||||
|
||||
agent_server_url = None
|
||||
for exposed_url in sandbox.exposed_urls:
|
||||
if exposed_url.name == AGENT_SERVER:
|
||||
agent_server_url = exposed_url.url
|
||||
break
|
||||
|
||||
if not agent_server_url:
|
||||
return ''
|
||||
|
||||
agent_server_url = replace_localhost_hostname_for_docker(agent_server_url)
|
||||
|
||||
# Create remote workspace
|
||||
remote_workspace = AsyncRemoteWorkspace(
|
||||
host=agent_server_url,
|
||||
api_key=sandbox.session_api_key,
|
||||
working_dir=sandbox_spec.working_dir,
|
||||
)
|
||||
|
||||
# Read the file at the specified path
|
||||
temp_file_path = None
|
||||
try:
|
||||
# Create a temporary file path to download the remote file
|
||||
with tempfile.NamedTemporaryFile(mode='w+b', delete=False) as temp_file:
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
# Download the file from remote system
|
||||
result = await remote_workspace.file_download(
|
||||
source_path=file_path,
|
||||
destination_path=temp_file_path,
|
||||
)
|
||||
|
||||
if result.success:
|
||||
# Read the content from the temporary file
|
||||
with open(temp_file_path, 'rb') as f:
|
||||
content = f.read()
|
||||
# Decode bytes to string
|
||||
return content.decode('utf-8')
|
||||
except Exception:
|
||||
# If there's any error reading the file, return empty string
|
||||
pass
|
||||
finally:
|
||||
# Clean up the temporary file
|
||||
if temp_file_path:
|
||||
try:
|
||||
os.unlink(temp_file_path)
|
||||
except Exception:
|
||||
# Ignore errors during cleanup
|
||||
pass
|
||||
|
||||
return ''
|
||||
|
||||
|
||||
async def _consume_remaining(
|
||||
async_iter, db_session: AsyncSession, httpx_client: httpx.AsyncClient
|
||||
):
|
||||
|
||||
@@ -68,17 +68,17 @@ from openhands.app_server.utils.docker_utils import (
|
||||
)
|
||||
from openhands.experiments.experiment_manager import ExperimentManagerImpl
|
||||
from openhands.integrations.provider import ProviderType
|
||||
from openhands.sdk import AgentContext, LocalWorkspace
|
||||
from openhands.sdk import LocalWorkspace
|
||||
from openhands.sdk.conversation.secret_source import LookupSecret, StaticSecret
|
||||
from openhands.sdk.llm import LLM
|
||||
from openhands.sdk.security.confirmation_policy import AlwaysConfirm
|
||||
from openhands.sdk.workspace.remote.async_remote_workspace import AsyncRemoteWorkspace
|
||||
from openhands.server.types import AppMode
|
||||
from openhands.tools.preset.default import get_default_agent
|
||||
from openhands.tools.preset.planning import get_planning_agent
|
||||
|
||||
_conversation_info_type_adapter = TypeAdapter(list[ConversationInfo | None])
|
||||
_logger = logging.getLogger(__name__)
|
||||
GIT_TOKEN = 'GIT_TOKEN'
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -97,8 +97,6 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
httpx_client: httpx.AsyncClient
|
||||
web_url: str | None
|
||||
access_token_hard_timeout: timedelta | None
|
||||
app_mode: str | None = None
|
||||
keycloak_auth_cookie: str | None = None
|
||||
|
||||
async def search_app_conversations(
|
||||
self,
|
||||
@@ -230,12 +228,10 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
await self._build_start_conversation_request_for_user(
|
||||
sandbox,
|
||||
request.initial_message,
|
||||
request.system_message_suffix,
|
||||
request.git_provider,
|
||||
sandbox_spec.working_dir,
|
||||
request.agent_type,
|
||||
request.llm_model,
|
||||
request.conversation_id,
|
||||
remote_workspace=remote_workspace,
|
||||
selected_repository=request.selected_repository,
|
||||
)
|
||||
@@ -281,17 +277,14 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
)
|
||||
|
||||
# Setup default processors
|
||||
processors = request.processors or []
|
||||
|
||||
# Always ensure SetTitleCallbackProcessor is included
|
||||
has_set_title_processor = any(
|
||||
isinstance(processor, SetTitleCallbackProcessor)
|
||||
for processor in processors
|
||||
)
|
||||
if not has_set_title_processor:
|
||||
processors.append(SetTitleCallbackProcessor())
|
||||
processors = request.processors
|
||||
if processors is None:
|
||||
processors = [SetTitleCallbackProcessor()]
|
||||
|
||||
# Save processors
|
||||
# Save processors sequentially to avoid concurrent database session usage
|
||||
# This is a simple database operation (upsert) that's very fast, so the
|
||||
# performance impact of sequential vs parallel is negligible
|
||||
for processor in processors:
|
||||
await self.event_callback_service.save_event_callback(
|
||||
EventCallback(
|
||||
@@ -523,12 +516,10 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
self,
|
||||
sandbox: SandboxInfo,
|
||||
initial_message: SendMessageRequest | None,
|
||||
system_message_suffix: str | None,
|
||||
git_provider: ProviderType | None,
|
||||
working_dir: str,
|
||||
agent_type: AgentType = AgentType.DEFAULT,
|
||||
llm_model: str | None = None,
|
||||
conversation_id: UUID | None = None,
|
||||
remote_workspace: AsyncRemoteWorkspace | None = None,
|
||||
selected_repository: str | None = None,
|
||||
) -> StartConversationRequest:
|
||||
@@ -537,7 +528,6 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
# Set up a secret for the git token
|
||||
secrets = await self.user_context.get_secrets()
|
||||
if git_provider:
|
||||
secret_name = f'{git_provider.name}_TOKEN'
|
||||
if self.web_url:
|
||||
# If there is a web url, then we create an access token to access it.
|
||||
# For security reasons, we are explicit here - only this user, and
|
||||
@@ -549,15 +539,9 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
},
|
||||
expires_in=self.access_token_hard_timeout,
|
||||
)
|
||||
headers = {'X-Access-Token': access_token}
|
||||
|
||||
# Include keycloak_auth cookie in headers if app_mode is SaaS
|
||||
if self.app_mode == 'saas' and self.keycloak_auth_cookie:
|
||||
headers['Cookie'] = f'keycloak_auth={self.keycloak_auth_cookie}'
|
||||
|
||||
secrets[secret_name] = LookupSecret(
|
||||
secrets[GIT_TOKEN] = LookupSecret(
|
||||
url=self.web_url + '/api/v1/webhooks/secrets',
|
||||
headers=headers,
|
||||
headers={'X-Access-Token': access_token},
|
||||
)
|
||||
else:
|
||||
# If there is no URL specified where the sandbox can access the app server
|
||||
@@ -565,7 +549,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
# on the type, this may eventually expire.
|
||||
static_token = await self.user_context.get_latest_token(git_provider)
|
||||
if static_token:
|
||||
secrets[secret_name] = StaticSecret(value=static_token)
|
||||
secrets[GIT_TOKEN] = StaticSecret(value=static_token)
|
||||
|
||||
workspace = LocalWorkspace(working_dir=working_dir)
|
||||
|
||||
@@ -584,10 +568,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
else:
|
||||
agent = get_default_agent(llm=llm)
|
||||
|
||||
agent_context = AgentContext(system_message_suffix=system_message_suffix)
|
||||
agent = agent.model_copy(update={'agent_context': agent_context})
|
||||
|
||||
conversation_id = conversation_id or uuid4()
|
||||
conversation_id = uuid4()
|
||||
agent = ExperimentManagerImpl.run_agent_variant_tests__v1(
|
||||
user.id, conversation_id, agent
|
||||
)
|
||||
@@ -859,21 +840,6 @@ class LiveStatusAppConversationServiceInjector(AppConversationServiceInjector):
|
||||
if isinstance(sandbox_service, DockerSandboxService):
|
||||
web_url = f'http://host.docker.internal:{sandbox_service.host_port}'
|
||||
|
||||
# Get app_mode and keycloak_auth cookie for SaaS mode
|
||||
app_mode = None
|
||||
keycloak_auth_cookie = None
|
||||
try:
|
||||
from openhands.server.shared import server_config
|
||||
|
||||
app_mode = (
|
||||
server_config.app_mode.value if server_config.app_mode else None
|
||||
)
|
||||
if request and server_config.app_mode == AppMode.SAAS:
|
||||
keycloak_auth_cookie = request.cookies.get('keycloak_auth')
|
||||
except (ImportError, AttributeError):
|
||||
# If server_config is not available (e.g., in tests), continue without it
|
||||
pass
|
||||
|
||||
yield LiveStatusAppConversationService(
|
||||
init_git_in_empty_workspace=self.init_git_in_empty_workspace,
|
||||
user_context=user_context,
|
||||
@@ -888,6 +854,4 @@ class LiveStatusAppConversationServiceInjector(AppConversationServiceInjector):
|
||||
httpx_client=httpx_client,
|
||||
web_url=web_url,
|
||||
access_token_hard_timeout=access_token_hard_timeout,
|
||||
app_mode=app_mode,
|
||||
keycloak_auth_cookie=keycloak_auth_cookie,
|
||||
)
|
||||
|
||||
@@ -9,8 +9,6 @@ from fastapi import Depends, Request
|
||||
from pydantic import Field
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
# Import the event_callback module to ensure all processors are registered
|
||||
import openhands.app_server.event_callback # noqa: F401
|
||||
from openhands.agent_server.env_parser import from_env
|
||||
from openhands.app_server.app_conversation.app_conversation_info_service import (
|
||||
AppConversationInfoService,
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
"""Event callback system for OpenHands.
|
||||
|
||||
This module provides the event callback system that allows processors to be
|
||||
registered and executed when specific events occur during conversations.
|
||||
|
||||
All callback processors must be imported here to ensure they are registered
|
||||
with the discriminated union system used by Pydantic for validation.
|
||||
"""
|
||||
|
||||
# Import base classes and processors without circular dependencies
|
||||
from .event_callback_models import EventCallbackProcessor, LoggingCallbackProcessor
|
||||
from .github_v1_callback_processor import GithubV1CallbackProcessor
|
||||
|
||||
# Note: SetTitleCallbackProcessor is not imported here to avoid circular imports
|
||||
# It will be registered when imported elsewhere in the application
|
||||
|
||||
__all__ = [
|
||||
'EventCallbackProcessor',
|
||||
'LoggingCallbackProcessor',
|
||||
'GithubV1CallbackProcessor',
|
||||
]
|
||||
@@ -1,296 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
import httpx
|
||||
from github import Github, GithubIntegration
|
||||
from pydantic import Field
|
||||
|
||||
from openhands.agent_server.models import AskAgentRequest, AskAgentResponse
|
||||
from openhands.app_server.event_callback.event_callback_models import (
|
||||
EventCallback,
|
||||
EventCallbackProcessor,
|
||||
)
|
||||
from openhands.app_server.event_callback.event_callback_result_models import (
|
||||
EventCallbackResult,
|
||||
EventCallbackResultStatus,
|
||||
)
|
||||
from openhands.app_server.event_callback.util import (
|
||||
ensure_conversation_found,
|
||||
ensure_running_sandbox,
|
||||
get_agent_server_url_from_sandbox,
|
||||
get_conversation_url,
|
||||
get_prompt_template,
|
||||
)
|
||||
from openhands.sdk import Event
|
||||
from openhands.sdk.event import ConversationStateUpdateEvent
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GithubV1CallbackProcessor(EventCallbackProcessor):
|
||||
"""Callback processor for GitHub V1 integrations."""
|
||||
|
||||
github_view_data: dict[str, Any] = Field(default_factory=dict)
|
||||
should_request_summary: bool = Field(default=True)
|
||||
should_extract: bool = Field(default=True)
|
||||
inline_pr_comment: bool = Field(default=False)
|
||||
|
||||
async def __call__(
|
||||
self,
|
||||
conversation_id: UUID,
|
||||
callback: EventCallback,
|
||||
event: Event,
|
||||
) -> EventCallbackResult | None:
|
||||
"""Process events for GitHub V1 integration."""
|
||||
|
||||
# Only handle ConversationStateUpdateEvent
|
||||
if not isinstance(event, ConversationStateUpdateEvent):
|
||||
return None
|
||||
|
||||
# Only act when execution has finished
|
||||
if not (event.key == 'execution_status' and event.value == 'finished'):
|
||||
return None
|
||||
|
||||
_logger.info('[GitHub V1] Callback agent state was %s', event)
|
||||
_logger.info(
|
||||
'[GitHub V1] Should request summary: %s', self.should_request_summary
|
||||
)
|
||||
|
||||
if not self.should_request_summary:
|
||||
return None
|
||||
|
||||
self.should_request_summary = False
|
||||
|
||||
try:
|
||||
summary = await self._request_summary(conversation_id)
|
||||
await self._post_summary_to_github(summary)
|
||||
|
||||
return EventCallbackResult(
|
||||
status=EventCallbackResultStatus.SUCCESS,
|
||||
event_callback_id=callback.id,
|
||||
event_id=event.id,
|
||||
conversation_id=conversation_id,
|
||||
detail=summary,
|
||||
)
|
||||
except Exception as e:
|
||||
_logger.exception('[GitHub V1] Error processing callback: %s', e)
|
||||
|
||||
# Only try to post error to GitHub if we have basic requirements
|
||||
try:
|
||||
# Check if we have installation ID and credentials before posting
|
||||
if (
|
||||
self.github_view_data.get('installation_id')
|
||||
and os.getenv('GITHUB_APP_CLIENT_ID')
|
||||
and os.getenv('GITHUB_APP_PRIVATE_KEY')
|
||||
):
|
||||
await self._post_summary_to_github(
|
||||
f'OpenHands encountered an error: **{str(e)}**.\n\n'
|
||||
f'[See the conversation]({get_conversation_url().format(conversation_id)})'
|
||||
'for more information.'
|
||||
)
|
||||
except Exception as post_error:
|
||||
_logger.warning(
|
||||
'[GitHub V1] Failed to post error message to GitHub: %s', post_error
|
||||
)
|
||||
|
||||
return EventCallbackResult(
|
||||
status=EventCallbackResultStatus.ERROR,
|
||||
event_callback_id=callback.id,
|
||||
event_id=event.id,
|
||||
conversation_id=conversation_id,
|
||||
detail=str(e),
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# GitHub helpers
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get_installation_access_token(self) -> str:
|
||||
installation_id = self.github_view_data.get('installation_id')
|
||||
|
||||
if not installation_id:
|
||||
raise ValueError(
|
||||
f'Missing installation ID for GitHub payload: {self.github_view_data}'
|
||||
)
|
||||
|
||||
github_app_client_id = os.getenv('GITHUB_APP_CLIENT_ID', '').strip()
|
||||
github_app_private_key = os.getenv('GITHUB_APP_PRIVATE_KEY', '').replace(
|
||||
'\\n', '\n'
|
||||
)
|
||||
|
||||
if not github_app_client_id or not github_app_private_key:
|
||||
raise ValueError('GitHub App credentials are not configured')
|
||||
|
||||
github_integration = GithubIntegration(
|
||||
github_app_client_id,
|
||||
github_app_private_key,
|
||||
)
|
||||
token_data = github_integration.get_access_token(installation_id)
|
||||
return token_data.token
|
||||
|
||||
async def _post_summary_to_github(self, summary: str) -> None:
|
||||
"""Post a summary comment to the configured GitHub issue."""
|
||||
installation_token = self._get_installation_access_token()
|
||||
|
||||
if not installation_token:
|
||||
raise RuntimeError('Missing GitHub credentials')
|
||||
|
||||
full_repo_name = self.github_view_data['full_repo_name']
|
||||
issue_number = self.github_view_data['issue_number']
|
||||
|
||||
if self.inline_pr_comment:
|
||||
with Github(installation_token) as github_client:
|
||||
repo = github_client.get_repo(full_repo_name)
|
||||
pr = repo.get_pull(issue_number)
|
||||
pr.create_review_comment_reply(
|
||||
comment_id=self.github_view_data.get('comment_id', ''), body=summary
|
||||
)
|
||||
return
|
||||
|
||||
with Github(installation_token) as github_client:
|
||||
repo = github_client.get_repo(full_repo_name)
|
||||
issue = repo.get_issue(number=issue_number)
|
||||
issue.create_comment(summary)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Agent / sandbox helpers
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
async def _ask_question(
|
||||
self,
|
||||
httpx_client: httpx.AsyncClient,
|
||||
agent_server_url: str,
|
||||
conversation_id: UUID,
|
||||
session_api_key: str,
|
||||
message_content: str,
|
||||
) -> str:
|
||||
"""Send a message to the agent server via the V1 API and return response text."""
|
||||
send_message_request = AskAgentRequest(question=message_content)
|
||||
|
||||
url = (
|
||||
f'{agent_server_url.rstrip("/")}'
|
||||
f'/api/conversations/{conversation_id}/ask_agent'
|
||||
)
|
||||
headers = {'X-Session-API-Key': session_api_key}
|
||||
payload = send_message_request.model_dump()
|
||||
|
||||
try:
|
||||
response = await httpx_client.post(
|
||||
url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=30.0,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
agent_response = AskAgentResponse.model_validate(response.json())
|
||||
return agent_response.response
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_detail = f'HTTP {e.response.status_code} error'
|
||||
try:
|
||||
error_body = e.response.text
|
||||
if error_body:
|
||||
error_detail += f': {error_body}'
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
|
||||
_logger.error(
|
||||
'[GitHub V1] HTTP error sending message to %s: %s. '
|
||||
'Request payload: %s. Response headers: %s',
|
||||
url,
|
||||
error_detail,
|
||||
payload,
|
||||
dict(e.response.headers),
|
||||
exc_info=True,
|
||||
)
|
||||
raise Exception(f'Failed to send message to agent server: {error_detail}')
|
||||
|
||||
except httpx.TimeoutException:
|
||||
error_detail = f'Request timeout after 30 seconds to {url}'
|
||||
_logger.error(
|
||||
'[GitHub V1] %s. Request payload: %s',
|
||||
error_detail,
|
||||
payload,
|
||||
exc_info=True,
|
||||
)
|
||||
raise Exception(error_detail)
|
||||
|
||||
except httpx.RequestError as e:
|
||||
error_detail = f'Request error to {url}: {str(e)}'
|
||||
_logger.error(
|
||||
'[GitHub V1] %s. Request payload: %s',
|
||||
error_detail,
|
||||
payload,
|
||||
exc_info=True,
|
||||
)
|
||||
raise Exception(error_detail)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Summary orchestration
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
async def _request_summary(self, conversation_id: UUID) -> str:
|
||||
"""
|
||||
Ask the agent to produce a summary of its work and return the agent response.
|
||||
|
||||
NOTE: This method now returns a string (the agent server's response text)
|
||||
and raises exceptions on errors. The wrapping into EventCallbackResult
|
||||
is handled by __call__.
|
||||
"""
|
||||
# Import services within the method to avoid circular imports
|
||||
from openhands.app_server.config import (
|
||||
get_app_conversation_info_service,
|
||||
get_httpx_client,
|
||||
get_sandbox_service,
|
||||
)
|
||||
from openhands.app_server.services.injector import InjectorState
|
||||
from openhands.app_server.user.specifiy_user_context import (
|
||||
ADMIN,
|
||||
USER_CONTEXT_ATTR,
|
||||
)
|
||||
|
||||
# Create injector state for dependency injection
|
||||
state = InjectorState()
|
||||
setattr(state, USER_CONTEXT_ATTR, ADMIN)
|
||||
|
||||
async with (
|
||||
get_app_conversation_info_service(state) as app_conversation_info_service,
|
||||
get_sandbox_service(state) as sandbox_service,
|
||||
get_httpx_client(state) as httpx_client,
|
||||
):
|
||||
# 1. Conversation lookup
|
||||
app_conversation_info = ensure_conversation_found(
|
||||
await app_conversation_info_service.get_app_conversation_info(
|
||||
conversation_id
|
||||
),
|
||||
conversation_id,
|
||||
)
|
||||
|
||||
# 2. Sandbox lookup + validation
|
||||
sandbox = ensure_running_sandbox(
|
||||
await sandbox_service.get_sandbox(app_conversation_info.sandbox_id),
|
||||
app_conversation_info.sandbox_id,
|
||||
)
|
||||
|
||||
assert sandbox.session_api_key is not None, (
|
||||
f'No session API key for sandbox: {sandbox.id}'
|
||||
)
|
||||
|
||||
# 3. URL + instruction
|
||||
agent_server_url = get_agent_server_url_from_sandbox(sandbox)
|
||||
agent_server_url = get_agent_server_url_from_sandbox(sandbox)
|
||||
|
||||
# Prepare message based on agent state
|
||||
message_content = get_prompt_template('summary_prompt.j2')
|
||||
|
||||
# Ask the agent and return the response text
|
||||
return await self._ask_question(
|
||||
httpx_client=httpx_client,
|
||||
agent_server_url=agent_server_url,
|
||||
conversation_id=conversation_id,
|
||||
session_api_key=sandbox.session_api_key,
|
||||
message_content=message_content,
|
||||
)
|
||||
@@ -209,10 +209,6 @@ class SQLEventCallbackService(EventCallbackService):
|
||||
for callback in callbacks
|
||||
]
|
||||
)
|
||||
|
||||
# Persist any new changes callbacks may have made to itself
|
||||
for callback in callbacks:
|
||||
await self.save_event_callback(callback)
|
||||
await self.db_session.commit()
|
||||
|
||||
async def execute_callback(
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from uuid import UUID
|
||||
|
||||
from openhands.app_server.sandbox.sandbox_models import (
|
||||
AGENT_SERVER,
|
||||
SandboxInfo,
|
||||
SandboxStatus,
|
||||
)
|
||||
from openhands.app_server.utils.docker_utils import (
|
||||
replace_localhost_hostname_for_docker,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
AppConversationInfo,
|
||||
)
|
||||
|
||||
|
||||
def get_conversation_url() -> str:
|
||||
from openhands.app_server.config import get_global_config
|
||||
|
||||
web_url = get_global_config().web_url
|
||||
conversation_prefix = 'conversations/{}'
|
||||
conversation_url = f'{web_url}/{conversation_prefix}'
|
||||
return conversation_url
|
||||
|
||||
|
||||
def ensure_conversation_found(
|
||||
app_conversation_info: AppConversationInfo | None, conversation_id: UUID
|
||||
) -> AppConversationInfo:
|
||||
"""Ensure conversation info exists, otherwise raise a clear error."""
|
||||
if not app_conversation_info:
|
||||
raise RuntimeError(f'Conversation not found: {conversation_id}')
|
||||
return app_conversation_info
|
||||
|
||||
|
||||
def ensure_running_sandbox(sandbox: SandboxInfo | None, sandbox_id: str) -> SandboxInfo:
|
||||
"""Ensure sandbox exists, is running, and has a session API key."""
|
||||
if not sandbox:
|
||||
raise RuntimeError(f'Sandbox not found: {sandbox_id}')
|
||||
|
||||
if sandbox.status != SandboxStatus.RUNNING:
|
||||
raise RuntimeError(f'Sandbox not running: {sandbox_id}')
|
||||
|
||||
if not sandbox.session_api_key:
|
||||
raise RuntimeError(f'No session API key for sandbox: {sandbox.id}')
|
||||
|
||||
return sandbox
|
||||
|
||||
|
||||
def get_agent_server_url_from_sandbox(sandbox: SandboxInfo) -> str:
|
||||
"""Return the agent server URL from sandbox exposed URLs."""
|
||||
exposed_urls = sandbox.exposed_urls
|
||||
if not exposed_urls:
|
||||
raise RuntimeError(f'No exposed URLs configured for sandbox {sandbox.id!r}')
|
||||
|
||||
try:
|
||||
agent_server_url = next(
|
||||
exposed_url.url
|
||||
for exposed_url in exposed_urls
|
||||
if exposed_url.name == AGENT_SERVER
|
||||
)
|
||||
except StopIteration:
|
||||
raise RuntimeError(
|
||||
f'No {AGENT_SERVER!r} URL found for sandbox {sandbox.id!r}'
|
||||
) from None
|
||||
|
||||
return replace_localhost_hostname_for_docker(agent_server_url)
|
||||
|
||||
|
||||
def get_prompt_template(template_name: str) -> str:
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
jinja_env = Environment(
|
||||
loader=FileSystemLoader('openhands/integrations/templates/resolver/')
|
||||
)
|
||||
summary_instruction_template = jinja_env.get_template(template_name)
|
||||
summary_instruction = summary_instruction_template.render()
|
||||
return summary_instruction
|
||||
@@ -6,10 +6,9 @@ import logging
|
||||
import pkgutil
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Response, status
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from fastapi.security import APIKeyHeader
|
||||
from jwt import InvalidTokenError
|
||||
from pydantic import SecretStr
|
||||
|
||||
from openhands import tools # type: ignore[attr-defined]
|
||||
from openhands.agent_server.models import ConversationInfo, Success
|
||||
@@ -34,7 +33,6 @@ from openhands.app_server.sandbox.sandbox_models import SandboxInfo
|
||||
from openhands.app_server.sandbox.sandbox_service import SandboxService
|
||||
from openhands.app_server.services.injector import InjectorState
|
||||
from openhands.app_server.services.jwt_service import JwtService
|
||||
from openhands.app_server.user.auth_user_context import AuthUserContext
|
||||
from openhands.app_server.user.specifiy_user_context import (
|
||||
USER_CONTEXT_ATTR,
|
||||
SpecifyUserContext,
|
||||
@@ -43,10 +41,6 @@ from openhands.app_server.user.specifiy_user_context import (
|
||||
from openhands.app_server.user.user_context import UserContext
|
||||
from openhands.integrations.provider import ProviderType
|
||||
from openhands.sdk import Event
|
||||
from openhands.server.user_auth.default_user_auth import DefaultUserAuth
|
||||
from openhands.server.user_auth.user_auth import (
|
||||
get_for_user as get_user_auth_for_user,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix='/webhooks', tags=['Webhooks'])
|
||||
sandbox_service_dependency = depends_sandbox_service()
|
||||
@@ -160,34 +154,23 @@ async def on_event(
|
||||
async def get_secret(
|
||||
access_token: str = Depends(APIKeyHeader(name='X-Access-Token', auto_error=False)),
|
||||
jwt_service: JwtService = jwt_dependency,
|
||||
) -> Response:
|
||||
) -> str:
|
||||
"""Given an access token, retrieve a user secret. The access token
|
||||
is limited by user and provider type, and may include a timeout, limiting
|
||||
the damage in the event that a token is ever leaked"""
|
||||
try:
|
||||
payload = jwt_service.verify_jws_token(access_token)
|
||||
user_id = payload['user_id']
|
||||
provider_type = ProviderType(payload['provider_type'])
|
||||
|
||||
# Get UserAuth for the user_id
|
||||
if user_id:
|
||||
user_auth = await get_user_auth_for_user(user_id)
|
||||
else:
|
||||
# OSS mode - use default user auth
|
||||
user_auth = DefaultUserAuth()
|
||||
|
||||
# Create UserContext directly
|
||||
user_context = AuthUserContext(user_auth=user_auth)
|
||||
|
||||
secret = await user_context.get_latest_token(provider_type)
|
||||
provider_type = ProviderType[payload['provider_type']]
|
||||
user_injector = config.user
|
||||
assert user_injector is not None
|
||||
user_context = await user_injector.get_for_user(user_id)
|
||||
secret = None
|
||||
if user_context:
|
||||
secret = await user_context.get_latest_token(provider_type)
|
||||
if secret is None:
|
||||
raise HTTPException(404, 'No such provider')
|
||||
if isinstance(secret, SecretStr):
|
||||
secret_value = secret.get_secret_value()
|
||||
else:
|
||||
secret_value = secret
|
||||
|
||||
return Response(content=secret_value, media_type='text/plain')
|
||||
return secret
|
||||
except InvalidTokenError:
|
||||
raise HTTPException(status.HTTP_401_UNAUTHORIZED)
|
||||
|
||||
|
||||
@@ -217,9 +217,7 @@ class DockerSandboxService(SandboxService):
|
||||
sandboxes = []
|
||||
|
||||
for container in all_containers:
|
||||
if container.name and container.name.startswith(
|
||||
self.container_name_prefix
|
||||
):
|
||||
if container.name.startswith(self.container_name_prefix):
|
||||
sandbox_info = await self._container_to_checked_sandbox_info(
|
||||
container
|
||||
)
|
||||
|
||||
@@ -42,8 +42,6 @@ def get_default_sandbox_specs():
|
||||
'LOG_JSON': 'true',
|
||||
'OH_CONVERSATIONS_PATH': '/workspace/conversations',
|
||||
'OH_BASH_EVENTS_DIR': '/workspace/bash_events',
|
||||
'PYTHONUNBUFFERED': '1',
|
||||
'ENV_LOG_LEVEL': '20',
|
||||
},
|
||||
working_dir='/workspace/project',
|
||||
)
|
||||
|
||||
@@ -11,7 +11,7 @@ from openhands.sdk.utils.models import DiscriminatedUnionMixin
|
||||
|
||||
# The version of the agent server to use for deployments.
|
||||
# Typically this will be the same as the values from the pyproject.toml
|
||||
AGENT_SERVER_IMAGE = 'ghcr.io/openhands/agent-server:5f62cee-python'
|
||||
AGENT_SERVER_IMAGE = 'ghcr.io/openhands/agent-server:15f565b-python'
|
||||
|
||||
|
||||
class SandboxSpecService(ABC):
|
||||
|
||||
@@ -71,7 +71,7 @@ def get_impl(cls: type[T], impl_name: str | None) -> type[T]:
|
||||
Common Use Cases:
|
||||
- Server components (ConversationService, UserAuth, etc.)
|
||||
- Storage implementations (ConversationStore, SettingsStore, etc.)
|
||||
- Service integrations (GitHub, GitLab, Bitbucket, Azure DevOps services)
|
||||
- Service integrations (GitHub, GitLab, Bitbucket services)
|
||||
|
||||
The implementation is cached to avoid repeated imports of the same class.
|
||||
"""
|
||||
|
||||
@@ -42,6 +42,10 @@ from openhands.core.exceptions import (
|
||||
from openhands.core.logger import LOG_ALL_EVENTS
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.schema import AgentState
|
||||
from openhands.utils.posthog_tracker import (
|
||||
track_agent_task_completed,
|
||||
track_credit_limit_reached,
|
||||
)
|
||||
from openhands.events import (
|
||||
EventSource,
|
||||
EventStream,
|
||||
@@ -709,6 +713,20 @@ class AgentController:
|
||||
EventSource.ENVIRONMENT,
|
||||
)
|
||||
|
||||
# Track agent task completion in PostHog
|
||||
if new_state == AgentState.FINISHED:
|
||||
try:
|
||||
# Get app_mode from environment, default to 'oss'
|
||||
app_mode = os.environ.get('APP_MODE', 'oss')
|
||||
track_agent_task_completed(
|
||||
conversation_id=self.id,
|
||||
user_id=self.user_id,
|
||||
app_mode=app_mode,
|
||||
)
|
||||
except Exception as e:
|
||||
# Don't let tracking errors interrupt the agent
|
||||
self.log('warning', f'Failed to track agent completion: {e}')
|
||||
|
||||
# Save state whenever agent state changes to ensure we don't lose state
|
||||
# in case of crashes or unexpected circumstances
|
||||
self.save_state()
|
||||
@@ -877,7 +895,7 @@ class AgentController:
|
||||
|
||||
# Synchronize spend across all llm services with the budget flag
|
||||
self.state_tracker.sync_budget_flag_with_metrics()
|
||||
if self.agent.config.enable_stuck_detection and self._is_stuck():
|
||||
if self._is_stuck():
|
||||
await self._react_to_exception(
|
||||
AgentStuckInLoopError('Agent got stuck in a loop')
|
||||
)
|
||||
@@ -887,6 +905,18 @@ class AgentController:
|
||||
self.state_tracker.run_control_flags()
|
||||
except Exception as e:
|
||||
logger.warning('Control flag limits hit')
|
||||
# Track credit limit reached if it's a budget exception
|
||||
if 'budget' in str(e).lower() and self.state.budget_flag:
|
||||
try:
|
||||
track_credit_limit_reached(
|
||||
conversation_id=self.id,
|
||||
user_id=self.user_id,
|
||||
current_budget=self.state.budget_flag.current_value,
|
||||
max_budget=self.state.budget_flag.max_value,
|
||||
)
|
||||
except Exception as track_error:
|
||||
# Don't let tracking errors interrupt the agent
|
||||
self.log('warning', f'Failed to track credit limit: {track_error}')
|
||||
await self._react_to_exception(e)
|
||||
return
|
||||
|
||||
|
||||
@@ -32,7 +32,6 @@ The `load_from_env` function in the config package is responsible for loading co
|
||||
export LLM_API_KEY='your_api_key_here'
|
||||
export LLM_MODEL='gpt-4'
|
||||
export AGENT_MEMORY_ENABLED='true'
|
||||
export AGENT_ENABLE_STUCK_DETECTION='false' # Disable loop detection
|
||||
export SANDBOX_TIMEOUT='300'
|
||||
```
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user