mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
25 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ccca13c6b9 | |||
| 75a472cf74 | |||
| dbe5e1628b | |||
| a84e02b100 | |||
| 46d9e7a633 | |||
| 61b7053eee | |||
| ae58f41aa3 | |||
| d42c4779c0 | |||
| 022644b4fe | |||
| 5378f9f446 | |||
| f25a2c00b0 | |||
| cfe01d4c8a | |||
| b2fec83b9a | |||
| b96f754b55 | |||
| 6135aad457 | |||
| b40c4e41e4 | |||
| f904fa6a56 | |||
| 9576059eda | |||
| 03d2d9a57a | |||
| 0a81d5a977 | |||
| c53b222ef4 | |||
| f45920de09 | |||
| cf2971b374 | |||
| 70a59f48d3 | |||
| dc2d1fcd9a |
@@ -22,7 +22,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install poetry via pipx
|
||||
uses: abatilo/actions-poetry@v4
|
||||
uses: abatilo/actions-poetry@v3
|
||||
with:
|
||||
poetry-version: 2.1.3
|
||||
|
||||
@@ -169,6 +169,7 @@ jobs:
|
||||
- name: Run end-to-end tests
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.E2E_TEST_GITHUB_TOKEN }}
|
||||
GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL || 'gpt-4o' }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY || 'test-key' }}
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
@@ -187,6 +188,7 @@ jobs:
|
||||
test_settings.py::test_github_token_configuration \
|
||||
test_conversation.py::test_conversation_start \
|
||||
test_browsing_catchphrase.py::test_browsing_catchphrase \
|
||||
test_gitlab_integration.py::test_gitlab_repository_cloning \
|
||||
-v --no-header --capture=no --timeout=900
|
||||
|
||||
- name: Upload test results
|
||||
|
||||
@@ -73,7 +73,7 @@ jobs:
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: poetry install --with dev,test,runtime
|
||||
- name: Run Windows unit tests
|
||||
run: poetry run pytest -svv tests/unit/runtime/utils/test_windows_bash.py
|
||||
run: poetry run pytest -svv tests/unit/test_windows_bash.py
|
||||
env:
|
||||
PYTHONPATH: ".;$env:PYTHONPATH"
|
||||
DEBUG: "1"
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
name: Welcome Good First Issue
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [labeled]
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
comment-on-good-first-issue:
|
||||
if: github.event.label.name == 'good first issue'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check if welcome comment already exists
|
||||
id: check_comment
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
result-encoding: string
|
||||
script: |
|
||||
const issueNumber = context.issue.number;
|
||||
const comments = await github.rest.issues.listComments({
|
||||
...context.repo,
|
||||
issue_number: issueNumber
|
||||
});
|
||||
|
||||
const alreadyCommented = comments.data.some(
|
||||
(comment) =>
|
||||
comment.body.includes('<!-- auto-comment:good-first-issue -->')
|
||||
);
|
||||
|
||||
return alreadyCommented ? 'true' : 'false';
|
||||
|
||||
- name: Leave welcome comment
|
||||
if: steps.check_comment.outputs.result == 'false'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const repoUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}`;
|
||||
|
||||
await github.rest.issues.createComment({
|
||||
...context.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: "🙌 **Hey there, future contributor!** 🙌\n\n" +
|
||||
"This issue has been labeled as **good first issue**, which means it's a great place to get started with the OpenHands project.\n\n" +
|
||||
"If you're interested in working on it, feel free to! No need to ask for permission.\n\n" +
|
||||
"Be sure to check out our [development setup guide](" + repoUrl + "/blob/main/Development.md) to get your environment set up, and follow our [contribution guidelines](" + repoUrl + "/blob/main/CONTRIBUTING.md) when you're ready to submit a fix.\n\n" +
|
||||
"🙌 Happy hacking! 🙌\n\n" +
|
||||
"<!-- auto-comment:good-first-issue -->"
|
||||
});
|
||||
+1
-1
@@ -159,7 +159,7 @@ poetry run pytest ./tests/unit/test_*.py
|
||||
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker
|
||||
container image by setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
|
||||
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.54-nikolaik`
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.53-nikolaik`
|
||||
|
||||
## Develop inside Docker container
|
||||
|
||||
|
||||
@@ -79,17 +79,17 @@ You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)
|
||||
You can also run OpenHands directly with Docker:
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
+3
-3
@@ -51,17 +51,17 @@ OpenHands也可以使用Docker在本地系统上运行。
|
||||
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
```
|
||||
|
||||
> **注意**: 如果您在0.44版本之前使用过OpenHands,您可能需要运行 `mv ~/.openhands-state ~/.openhands` 来将对话历史迁移到新位置。
|
||||
|
||||
+3
-3
@@ -42,17 +42,17 @@ OpenHandsはDockerを利用してローカル環境でも実行できます。
|
||||
> 公共ネットワークで実行していますか?[Hardened Docker Installation Guide](https://docs.all-hands.dev/usage/runtimes/docker#hardened-docker-installation)を参照して、ネットワークバインディングの制限や追加のセキュリティ対策を実施してください。
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
```
|
||||
|
||||
**注**: バージョン0.44以前のOpenHandsを使用していた場合は、会話履歴を移行するために `mv ~/.openhands-state ~/.openhands` を実行してください。
|
||||
|
||||
@@ -21,7 +21,7 @@ ENV POETRY_NO_INTERACTION=1 \
|
||||
POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& apt-get install -y curl make git build-essential jq gettext \
|
||||
&& apt-get install -y curl make git build-essential \
|
||||
&& python3 -m pip install poetry --break-system-packages
|
||||
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
|
||||
@@ -12,7 +12,7 @@ services:
|
||||
- SANDBOX_API_HOSTNAME=host.docker.internal
|
||||
- DOCKER_HOST_ADDR=host.docker.internal
|
||||
#
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.54-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.53-nikolaik}
|
||||
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
+1
-1
@@ -7,7 +7,7 @@ services:
|
||||
image: openhands:latest
|
||||
container_name: openhands-app-${DATE:-}
|
||||
environment:
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik}
|
||||
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of ~/.openhands for this user
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
+2082
-3928
File diff suppressed because it is too large
Load Diff
@@ -119,7 +119,7 @@ The conversation history will be saved in `~/.openhands/sessions`.
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -128,8 +128,8 @@ docker run -it \
|
||||
-v ~/.openhands:/.openhands \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54 \
|
||||
python -m openhands.cli.entry --override-cli-mode true
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53 \
|
||||
python -m openhands.cli.main --override-cli-mode true
|
||||
```
|
||||
|
||||
<Note>
|
||||
|
||||
@@ -61,7 +61,7 @@ export GITHUB_TOKEN="your-token" # Required for repository operations
|
||||
# Run OpenHands
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -73,7 +73,7 @@ docker run -it \
|
||||
-v ~/.openhands:/.openhands \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -68,23 +68,23 @@ Download and install the LM Studio desktop app from [lmstudio.ai](https://lmstud
|
||||
1. Check [the installation guide](/usage/local-setup) and ensure all prerequisites are met before running OpenHands, then run:
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
```
|
||||
|
||||
2. Wait until the server is running (see log below):
|
||||
```
|
||||
Digest: sha256:e72f9baecb458aedb9afc2cd5bc935118d1868719e55d50da73190d3a85c674f
|
||||
Status: Image is up to date for docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
Status: Image is up to date for docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
Starting OpenHands...
|
||||
Running OpenHands as root
|
||||
14:22:13 - openhands:INFO: server_config.py:50 - Using config class None
|
||||
|
||||
@@ -109,17 +109,17 @@ Note that you'll still need `uv` installed for the default MCP servers to work p
|
||||
<Accordion title="Docker Command (Click to expand)">
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
```
|
||||
|
||||
</Accordion>
|
||||
|
||||
@@ -130,28 +130,3 @@ docker run # ... \
|
||||
<Note>
|
||||
**Docker Desktop Required**: Network isolation features, including custom networks and `host.docker.internal` routing, require Docker Desktop. Docker Engine alone does not support these features on localhost across custom networks. If you're using Docker Engine without Docker Desktop, network isolation may not work as expected.
|
||||
</Note>
|
||||
|
||||
### Sidecar Containers
|
||||
|
||||
If you want to run sidecar containers to the sandbox 'runner' containers without exposing the sandbox containers to the host network, you can use the `SANDBOX_ADDITIONAL_NETWORKS` environment variable to specify additional Docker network names that should be added to the sandbox containers.
|
||||
|
||||
```bash
|
||||
docker network create openhands-sccache
|
||||
|
||||
docker run -d \
|
||||
--hostname openhandsredis \
|
||||
--network openhands-sccache \
|
||||
redis
|
||||
|
||||
docker run # ...
|
||||
-e SANDBOX_ADDITIONAL_NETWORKS='["openhands-sccache"]' \
|
||||
# ...
|
||||
```
|
||||
|
||||
Then all sandbox instances will have to access a shared redis instance at `openhandsredis:6379`.
|
||||
|
||||
#### Docker Compose gotcha
|
||||
|
||||
Note that Docker Compose adds a prefix (a scope) by default to created networks, which is not taken into account by the additional networks config. Therefore when using docker compose you have to either:
|
||||
- specify a network name via the `name` field to remove the scoping (https://docs.docker.com/reference/compose-file/networks/#name)
|
||||
- or provide the scope within the given config (e.g. `SANDBOX_ADDITIONAL_NETWORKS: '["myscope_openhands-sccache"]'` where `myscope` is the docker-compose assigned prefix).
|
||||
@@ -10,7 +10,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -147,7 +146,7 @@ def process_instance(
|
||||
|
||||
logger.info(f'Final message: {final_message} | Ground truth: {instance["text"]}')
|
||||
test_result = game.reward()
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -18,7 +18,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -274,7 +273,7 @@ def process_instance(
|
||||
# remove when it becomes unnecessary
|
||||
histories = compatibility_for_eval_history_pairs(state.history)
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -17,7 +17,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -247,7 +246,7 @@ def process_instance(
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
histories = compatibility_for_eval_history_pairs(state.history)
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -15,7 +15,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -295,7 +294,7 @@ def process_instance(
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
test_result = complete_runtime(runtime, instance)
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
|
||||
@@ -18,7 +18,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -423,7 +422,7 @@ def process_instance(
|
||||
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -11,7 +11,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -89,7 +88,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
|
||||
@@ -16,7 +16,6 @@ from evaluation.utils.shared import (
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -481,7 +480,7 @@ def process_instance(
|
||||
|
||||
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
|
||||
histories = [event_to_dict(event) for event in state.history]
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -17,7 +17,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -295,7 +294,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
test_result = complete_runtime(state)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
|
||||
@@ -22,7 +22,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -270,7 +269,7 @@ Here is the task:
|
||||
'model_answer': model_answer,
|
||||
'ground_truth': instance['Final answer'],
|
||||
}
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -109,7 +108,7 @@ def process_instance(
|
||||
# attempt to parse model_answer
|
||||
ast_eval_fn = instance['ast_eval']
|
||||
correct, hallucination = ast_eval_fn(instance_id, model_answer_raw)
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
logger.info(
|
||||
f'Final message: {model_answer_raw} | Correctness: {correct} | Hallucination: {hallucination}'
|
||||
)
|
||||
|
||||
@@ -30,7 +30,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -293,7 +292,7 @@ Ok now its time to start solving the question. Good luck!
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -23,7 +23,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -249,7 +248,7 @@ def process_instance(
|
||||
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
test_result = complete_runtime(runtime, instance)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
|
||||
@@ -22,7 +22,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -336,7 +335,7 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think
|
||||
)
|
||||
)
|
||||
assert state is not None
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
|
||||
test_result = complete_runtime(runtime, instance)
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -248,7 +247,7 @@ def process_instance(
|
||||
)
|
||||
test_result['final_message'] = final_message
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
|
||||
@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -175,7 +174,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Instruction is the first message from the USER
|
||||
instruction = ''
|
||||
|
||||
@@ -15,7 +15,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -206,7 +205,7 @@ def process_instance(
|
||||
task_state = state.extra_data['task_state']
|
||||
logger.info('Task state: ' + str(task_state.to_dict()))
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -26,7 +26,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -251,7 +250,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
||||
)
|
||||
)
|
||||
assert state is not None
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
|
||||
test_result = complete_runtime(runtime)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -219,7 +218,7 @@ If the program uses some packages that are incompatible, please figure out alter
|
||||
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -21,7 +21,6 @@ from evaluation.utils.shared import (
|
||||
EvalException,
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -180,7 +179,7 @@ def process_instance(
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
histories = [event_to_dict(event) for event in state.history]
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Save the output
|
||||
instruction = message_action.content
|
||||
|
||||
@@ -11,7 +11,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -135,7 +134,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
||||
correct = eval_answer(str(model_answer_raw), str(answer))
|
||||
logger.info(f'Final message: {model_answer_raw} | Correctness: {correct}')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -180,7 +179,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Instruction obtained from the first message from the USER
|
||||
instruction = ''
|
||||
|
||||
@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -164,7 +163,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Instruction is the first message from the USER
|
||||
instruction = ''
|
||||
|
||||
@@ -9,7 +9,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -136,7 +135,7 @@ def process_instance(
|
||||
assert len(histories) > 0, 'History should not be empty'
|
||||
|
||||
test_result: TestResult = test_class.verify_result(runtime, histories)
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
finally:
|
||||
runtime.close()
|
||||
|
||||
|
||||
@@ -668,23 +668,8 @@ def is_fatal_runtime_error(error: str | None) -> bool:
|
||||
|
||||
|
||||
def get_metrics(state: State) -> dict[str, Any]:
|
||||
"""Extract metrics for evaluations.
|
||||
|
||||
Prefer ConversationStats (source of truth) and fall back to state.metrics for
|
||||
backward compatibility.
|
||||
"""
|
||||
metrics: dict[str, Any]
|
||||
try:
|
||||
if getattr(state, 'conversation_stats', None):
|
||||
combined = state.conversation_stats.get_combined_metrics()
|
||||
metrics = combined.get()
|
||||
elif getattr(state, 'metrics', None):
|
||||
metrics = state.metrics.get()
|
||||
else:
|
||||
metrics = {}
|
||||
except Exception:
|
||||
metrics = state.metrics.get() if getattr(state, 'metrics', None) else {}
|
||||
|
||||
"""Extract metrics from the state."""
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
metrics['condenser'] = get_condensation_metadata(state)
|
||||
return metrics
|
||||
|
||||
|
||||
@@ -232,16 +232,13 @@ describe("RepositorySelectionForm", () => {
|
||||
renderForm();
|
||||
|
||||
const dropdown = await screen.findByTestId("repo-dropdown");
|
||||
const input = dropdown.querySelector(
|
||||
'input[type="text"]',
|
||||
) as HTMLInputElement;
|
||||
const input = dropdown.querySelector('input[type="text"]') as HTMLInputElement;
|
||||
expect(input).toBeInTheDocument();
|
||||
|
||||
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
|
||||
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
|
||||
"kubernetes/kubernetes",
|
||||
3,
|
||||
"github",
|
||||
);
|
||||
});
|
||||
|
||||
@@ -271,16 +268,13 @@ describe("RepositorySelectionForm", () => {
|
||||
renderForm();
|
||||
|
||||
const dropdown = await screen.findByTestId("repo-dropdown");
|
||||
const input = dropdown.querySelector(
|
||||
'input[type="text"]',
|
||||
) as HTMLInputElement;
|
||||
const input = dropdown.querySelector('input[type="text"]') as HTMLInputElement;
|
||||
expect(input).toBeInTheDocument();
|
||||
|
||||
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
|
||||
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
|
||||
"kubernetes/kubernetes",
|
||||
3,
|
||||
"github",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
+12
-78
@@ -444,38 +444,28 @@ describe("MicroagentManagement", () => {
|
||||
expect(filePath2).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render add microagent button", async () => {
|
||||
it("should display add microagent button in repository accordion", async () => {
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(screen.getByTestId("repository-name-tooltip")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Check that add microagent buttons are present
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
expect(addButtons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should open modal when add button is clicked", async () => {
|
||||
it("should open add microagent modal when add button is clicked", async () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(screen.getByTestId("repository-name-tooltip")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1302,18 +1292,11 @@ describe("MicroagentManagement", () => {
|
||||
it("should render add microagent button", async () => {
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Check that add microagent buttons are present
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
expect(addButtons.length).toBeGreaterThan(0);
|
||||
@@ -1323,18 +1306,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1385,18 +1361,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1416,18 +1385,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1446,18 +1408,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1486,18 +1441,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1520,18 +1468,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1553,18 +1494,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
|
||||
@@ -136,7 +136,7 @@ describe("Settings Screen", () => {
|
||||
"secrets",
|
||||
"api keys",
|
||||
];
|
||||
const sectionsToExclude = ["llm"];
|
||||
const sectionsToExclude = ["llm", "mcp"];
|
||||
|
||||
renderSettingsScreen();
|
||||
|
||||
|
||||
Generated
+2
-2
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.54.0",
|
||||
"version": "0.53.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.54.0",
|
||||
"version": "0.53.0",
|
||||
"dependencies": {
|
||||
"@heroui/react": "^2.8.2",
|
||||
"@heroui/use-infinite-scroll": "^2.2.10",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.54.0",
|
||||
"version": "0.53.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"engines": {
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
import { useCallback, useMemo, useState } from "react";
|
||||
import { useCallback, useMemo, useRef } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { Provider } from "../../types/settings";
|
||||
import { useGitRepositories } from "../../hooks/query/use-git-repositories";
|
||||
import { useSearchRepositories } from "../../hooks/query/use-search-repositories";
|
||||
import { useDebounce } from "../../hooks/use-debounce";
|
||||
import OpenHands from "../../api/open-hands";
|
||||
import { GitRepository } from "../../types/git";
|
||||
import {
|
||||
@@ -21,6 +19,10 @@ export interface GitRepositoryDropdownProps {
|
||||
onChange?: (repository?: GitRepository) => void;
|
||||
}
|
||||
|
||||
interface SearchCache {
|
||||
[key: string]: GitRepository[];
|
||||
}
|
||||
|
||||
export function GitRepositoryDropdown({
|
||||
provider,
|
||||
value,
|
||||
@@ -31,20 +33,6 @@ export function GitRepositoryDropdown({
|
||||
onChange,
|
||||
}: GitRepositoryDropdownProps) {
|
||||
const { t } = useTranslation();
|
||||
const [searchInput, setSearchInput] = useState("");
|
||||
const debouncedSearchInput = useDebounce(searchInput, 300);
|
||||
|
||||
// Process search input to handle URLs
|
||||
const processedSearchInput = useMemo(() => {
|
||||
if (debouncedSearchInput.startsWith("https://")) {
|
||||
const match = debouncedSearchInput.match(
|
||||
/https:\/\/[^/]+\/([^/]+\/[^/]+)/,
|
||||
);
|
||||
return match ? match[1] : debouncedSearchInput;
|
||||
}
|
||||
return debouncedSearchInput;
|
||||
}, [debouncedSearchInput]);
|
||||
|
||||
const {
|
||||
data,
|
||||
fetchNextPage,
|
||||
@@ -57,10 +45,6 @@ export function GitRepositoryDropdown({
|
||||
enabled: !disabled,
|
||||
});
|
||||
|
||||
// Search query for processed input (handles URLs)
|
||||
const { data: searchData, isLoading: isSearchLoading } =
|
||||
useSearchRepositories(processedSearchInput, provider);
|
||||
|
||||
const allOptions: AsyncSelectOption[] = useMemo(
|
||||
() =>
|
||||
data?.pages
|
||||
@@ -74,83 +58,90 @@ export function GitRepositoryDropdown({
|
||||
[data],
|
||||
);
|
||||
|
||||
const searchOptions: AsyncSelectOption[] = useMemo(
|
||||
() =>
|
||||
searchData
|
||||
? searchData.map((repo) => ({
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
}))
|
||||
: [],
|
||||
[searchData],
|
||||
);
|
||||
// Keep track of search results
|
||||
const searchCache = useRef<SearchCache>({});
|
||||
|
||||
const selectedOption = useMemo(() => {
|
||||
// First check in loaded pages
|
||||
const option = allOptions.find((opt) => opt.value === value);
|
||||
if (option) return option;
|
||||
|
||||
// If not found, check in search results
|
||||
const searchOption = searchOptions.find((opt) => opt.value === value);
|
||||
if (searchOption) return searchOption;
|
||||
// If not found, check in search cache
|
||||
const repo = Object.values(searchCache.current)
|
||||
.flat()
|
||||
.find((r) => r.id === value);
|
||||
|
||||
if (repo) {
|
||||
return {
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}, [allOptions, searchOptions, value]);
|
||||
}, [allOptions, value]);
|
||||
|
||||
const loadOptions = useCallback(
|
||||
async (inputValue: string): Promise<AsyncSelectOption[]> => {
|
||||
// Update search input to trigger debounced search
|
||||
setSearchInput(inputValue);
|
||||
|
||||
// If empty input, show all loaded options
|
||||
if (!inputValue.trim()) {
|
||||
return allOptions;
|
||||
}
|
||||
|
||||
// For very short inputs, do local filtering
|
||||
if (inputValue.length < 2) {
|
||||
return allOptions.filter((option) =>
|
||||
option.label.toLowerCase().includes(inputValue.toLowerCase()),
|
||||
);
|
||||
}
|
||||
|
||||
// Handle URL inputs by performing direct search
|
||||
// If it looks like a URL, pass the full URL to the API along with the provider
|
||||
if (inputValue.startsWith("https://")) {
|
||||
const match = inputValue.match(/https:\/\/[^/]+\/([^/]+\/[^/]+)/);
|
||||
if (match) {
|
||||
const repoName = match[1];
|
||||
try {
|
||||
// Perform direct search for URL-based inputs
|
||||
const repositories = await OpenHands.searchGitRepositories(
|
||||
try {
|
||||
const searchResults = await OpenHands.searchGitRepositories(
|
||||
inputValue,
|
||||
3,
|
||||
provider,
|
||||
);
|
||||
// Cache by URL to preserve mapping
|
||||
searchCache.current[inputValue] = searchResults;
|
||||
return searchResults.map((repo) => ({
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
}));
|
||||
} catch (_) {
|
||||
// Fallback: attempt with extracted path if server doesn't support URL search
|
||||
const match = inputValue.match(/https:\/\/[^/]+\/([^/]+\/[^/]+)/);
|
||||
if (match) {
|
||||
const repoName = match[1];
|
||||
const searchResults = await OpenHands.searchGitRepositories(
|
||||
repoName,
|
||||
3,
|
||||
provider,
|
||||
);
|
||||
return repositories.map((repo) => ({
|
||||
value: repo.full_name,
|
||||
searchCache.current[repoName] = searchResults;
|
||||
return searchResults.map((repo) => ({
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
data: repo,
|
||||
}));
|
||||
} catch (error) {
|
||||
// Fall back to local filtering if search fails
|
||||
return allOptions.filter((option) =>
|
||||
option.label.toLowerCase().includes(repoName.toLowerCase()),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For regular text inputs, use hook-based search results if available
|
||||
if (searchOptions.length > 0 && processedSearchInput === inputValue) {
|
||||
return searchOptions;
|
||||
// For any other input, search via API for the selected provider
|
||||
if (inputValue.length >= 2) {
|
||||
const searchResults = await OpenHands.searchGitRepositories(
|
||||
inputValue,
|
||||
10,
|
||||
provider,
|
||||
);
|
||||
// Cache the search results
|
||||
searchCache.current[inputValue] = searchResults;
|
||||
return searchResults.map((repo) => ({
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
}));
|
||||
}
|
||||
|
||||
// Fallback to local filtering while search is loading
|
||||
// For very short inputs, do local filtering
|
||||
return allOptions.filter((option) =>
|
||||
option.label.toLowerCase().includes(inputValue.toLowerCase()),
|
||||
);
|
||||
},
|
||||
[allOptions, searchOptions, processedSearchInput, provider],
|
||||
[allOptions, provider],
|
||||
);
|
||||
|
||||
const handleChange = (option: AsyncSelectOption | null) => {
|
||||
@@ -166,7 +157,9 @@ export function GitRepositoryDropdown({
|
||||
|
||||
// If not found, check in search results
|
||||
if (!repo) {
|
||||
repo = searchData?.find((r) => r.id === option.value);
|
||||
repo = Object.values(searchCache.current)
|
||||
.flat()
|
||||
.find((r) => r.id === option.value);
|
||||
}
|
||||
|
||||
onChange?.(repo);
|
||||
@@ -189,7 +182,7 @@ export function GitRepositoryDropdown({
|
||||
errorMessage={errorMessage}
|
||||
disabled={disabled}
|
||||
isClearable={false}
|
||||
isLoading={isLoading || isFetchingNextPage || isSearchLoading}
|
||||
isLoading={isLoading || isLoading || isFetchingNextPage}
|
||||
cacheOptions
|
||||
defaultOptions={allOptions}
|
||||
onChange={handleChange}
|
||||
|
||||
+1
-1
@@ -17,7 +17,7 @@ export function MicroagentManagementAccordionTitle({
|
||||
<TooltipButton
|
||||
tooltip={repository.full_name}
|
||||
ariaLabel={repository.full_name}
|
||||
className="text-white text-base font-normal bg-transparent p-0 min-w-0 h-auto cursor-pointer truncate max-w-[200px] translate-y-[-1px]"
|
||||
className="text-white text-base font-normal bg-transparent p-0 min-w-0 h-auto cursor-pointer truncate max-w-[232px]"
|
||||
testId="repository-name-tooltip"
|
||||
placement="bottom"
|
||||
>
|
||||
|
||||
+14
-11
@@ -7,6 +7,8 @@ import {
|
||||
} from "#/state/microagent-management-slice";
|
||||
import { RootState } from "#/store";
|
||||
import { GitRepository } from "#/types/git";
|
||||
import PlusIcon from "#/icons/plus.svg?react";
|
||||
import { TooltipButton } from "#/components/shared/buttons/tooltip-button";
|
||||
|
||||
interface MicroagentManagementAddMicroagentButtonProps {
|
||||
repository: GitRepository;
|
||||
@@ -23,22 +25,23 @@ export function MicroagentManagementAddMicroagentButton({
|
||||
|
||||
const dispatch = useDispatch();
|
||||
|
||||
const handleClick = (e: React.MouseEvent<HTMLButtonElement>) => {
|
||||
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||
e.stopPropagation();
|
||||
dispatch(setAddMicroagentModalVisible(!addMicroagentModalVisible));
|
||||
dispatch(setSelectedRepository(repository));
|
||||
};
|
||||
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleClick}
|
||||
className="translate-y-[-1px]"
|
||||
data-testid="add-microagent-button"
|
||||
>
|
||||
<span className="text-sm font-normal leading-5 text-[#8480FF] cursor-pointer hover:text-[#6C63FF] transition-colors duration-200">
|
||||
{t(I18nKey.COMMON$ADD_MICROAGENT)}
|
||||
</span>
|
||||
</button>
|
||||
<div onClick={handleClick}>
|
||||
<TooltipButton
|
||||
tooltip={t(I18nKey.COMMON$ADD_MICROAGENT)}
|
||||
ariaLabel={t(I18nKey.COMMON$ADD_MICROAGENT)}
|
||||
className="p-0 min-w-0 h-6 w-6 flex items-center justify-center bg-transparent cursor-pointer"
|
||||
testId="add-microagent-button"
|
||||
placement="bottom"
|
||||
>
|
||||
<PlusIcon width={22} height={22} />
|
||||
</TooltipButton>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
-40
@@ -1,5 +1,4 @@
|
||||
import React, { useEffect, useState } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { useDispatch, useSelector } from "react-redux";
|
||||
import { MicroagentManagementSidebar } from "./microagent-management-sidebar";
|
||||
import { MicroagentManagementMain } from "./microagent-management-main";
|
||||
@@ -26,12 +25,6 @@ import { GitRepository } from "#/types/git";
|
||||
import { queryClient } from "#/query-client-config";
|
||||
import { Provider } from "#/types/settings";
|
||||
import { MicroagentManagementLearnThisRepoModal } from "./microagent-management-learn-this-repo-modal";
|
||||
import {
|
||||
displaySuccessToast,
|
||||
displayErrorToast,
|
||||
} from "#/utils/custom-toast-handlers";
|
||||
import { getFirstPRUrl } from "#/utils/parse-pr-url";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
|
||||
// Handle error events
|
||||
const isErrorEvent = (evt: unknown): evt is { error: true; message: string } =>
|
||||
@@ -119,8 +112,6 @@ export function MicroagentManagementContent() {
|
||||
learnThisRepoModalVisible,
|
||||
} = useSelector((state: RootState) => state.microagentManagement);
|
||||
|
||||
const { t } = useTranslation();
|
||||
|
||||
const dispatch = useDispatch();
|
||||
|
||||
const { createConversationAndSubscribe, isPending } =
|
||||
@@ -168,37 +159,6 @@ export function MicroagentManagementContent() {
|
||||
? (selectedRepository as GitRepository).full_name
|
||||
: "";
|
||||
|
||||
// Check if agent is running and ready to work
|
||||
if (
|
||||
isOpenHandsEvent(socketEvent) &&
|
||||
isAgentStateChangeObservation(socketEvent) &&
|
||||
socketEvent.extras.agent_state === AgentState.RUNNING
|
||||
) {
|
||||
displaySuccessToast(
|
||||
t(I18nKey.MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT),
|
||||
);
|
||||
}
|
||||
|
||||
// Check if agent has finished and we have a PR
|
||||
if (isOpenHandsEvent(socketEvent) && isFinishAction(socketEvent)) {
|
||||
const prUrl = getFirstPRUrl(socketEvent.args.final_thought || "");
|
||||
if (prUrl) {
|
||||
displaySuccessToast(
|
||||
t(I18nKey.MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW),
|
||||
);
|
||||
} else {
|
||||
// Agent finished but no PR found
|
||||
displaySuccessToast(t(I18nKey.MICROAGENT_MANAGEMENT$PR_NOT_CREATED));
|
||||
}
|
||||
}
|
||||
|
||||
// Handle error events
|
||||
if (isErrorEvent(socketEvent) || isAgentStatusError(socketEvent)) {
|
||||
displayErrorToast(
|
||||
t(I18nKey.MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT),
|
||||
);
|
||||
}
|
||||
|
||||
if (shouldInvalidateConversationsList(socketEvent)) {
|
||||
invalidateConversationsList(repositoryName);
|
||||
}
|
||||
|
||||
+2
-14
@@ -65,18 +65,6 @@ export function MicroagentManagementRepoMicroagents({
|
||||
}
|
||||
}, [conversations]);
|
||||
|
||||
useEffect(
|
||||
() => () => {
|
||||
dispatch(
|
||||
setSelectedMicroagentItem({
|
||||
microagent: null,
|
||||
conversation: null,
|
||||
}),
|
||||
);
|
||||
},
|
||||
[],
|
||||
);
|
||||
|
||||
// Show loading only when both queries are loading
|
||||
const isLoading = isLoadingMicroagents || isLoadingConversations;
|
||||
|
||||
@@ -94,7 +82,7 @@ export function MicroagentManagementRepoMicroagents({
|
||||
// If there's an error with microagents, show the learn this repo component
|
||||
if (isError) {
|
||||
return (
|
||||
<div>
|
||||
<div className="pb-4">
|
||||
<MicroagentManagementLearnThisRepo repository={repository} />
|
||||
</div>
|
||||
);
|
||||
@@ -105,7 +93,7 @@ export function MicroagentManagementRepoMicroagents({
|
||||
const totalItems = numberOfMicroagents + numberOfConversations;
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div className="pb-4">
|
||||
{totalItems === 0 && (
|
||||
<MicroagentManagementLearnThisRepo repository={repository} />
|
||||
)}
|
||||
|
||||
+2
-4
@@ -97,10 +97,8 @@ export function MicroagentManagementRepositories({
|
||||
variant="splitted"
|
||||
className="w-full px-0 gap-3"
|
||||
itemClasses={{
|
||||
base: "shadow-none bg-transparent cursor-pointer px-0",
|
||||
trigger: "cursor-pointer gap-2 py-3",
|
||||
indicator:
|
||||
"flex items-center justify-center p-0.5 pr-[3px] text-white hover:bg-[#454545] rounded transition-colors duration-200 rotate-180",
|
||||
base: "shadow-none bg-transparent border border-[#ffffff40] rounded-[6px] cursor-pointer",
|
||||
trigger: "cursor-pointer gap-1",
|
||||
}}
|
||||
selectionMode="multiple"
|
||||
>
|
||||
|
||||
@@ -23,7 +23,7 @@ export function ModalBackdrop({ children, onClose }: ModalBackdropProps) {
|
||||
<div className="fixed inset-0 flex items-center justify-center z-20">
|
||||
<div
|
||||
onClick={handleClick}
|
||||
className="fixed inset-0 bg-black opacity-60"
|
||||
className="fixed inset-0 bg-black bg-opacity-80"
|
||||
/>
|
||||
<div className="relative">{children}</div>
|
||||
</div>
|
||||
|
||||
@@ -7,7 +7,12 @@ export const useRepositoryBranches = (repository: string | null) =>
|
||||
queryKey: ["repository", repository, "branches"],
|
||||
queryFn: async () => {
|
||||
if (!repository) return [];
|
||||
return OpenHands.getRepositoryBranches(repository);
|
||||
try {
|
||||
return await OpenHands.getRepositoryBranches(repository);
|
||||
} catch {
|
||||
// If we can't list branches (e.g., missing/invalid token), treat as no branches
|
||||
return [];
|
||||
}
|
||||
},
|
||||
enabled: !!repository,
|
||||
staleTime: 1000 * 60 * 5, // 5 minutes
|
||||
|
||||
@@ -810,8 +810,4 @@ export enum I18nKey {
|
||||
PROJECT_MANAGEMENT$CONFIGURE_MODAL_DESCRIPTION = "PROJECT_MANAGEMENT$CONFIGURE_MODAL_DESCRIPTION",
|
||||
PROJECT_MANAGEMENT$IMPORTANT_WORKSPACE_INTEGRATION = "PROJECT_MANAGEMENT$IMPORTANT_WORKSPACE_INTEGRATION",
|
||||
SETTINGS = "SETTINGS",
|
||||
MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT = "MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT",
|
||||
MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW = "MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW",
|
||||
MICROAGENT_MANAGEMENT$PR_NOT_CREATED = "MICROAGENT_MANAGEMENT$PR_NOT_CREATED",
|
||||
MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT = "MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT",
|
||||
}
|
||||
|
||||
@@ -12958,69 +12958,5 @@
|
||||
"tr": "A server with this URL already exists for the selected type",
|
||||
"de": "A server with this URL already exists for the selected type",
|
||||
"uk": "A server with this URL already exists for the selected type"
|
||||
},
|
||||
"MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT": {
|
||||
"en": "Opening a PR to create the microagent for you...",
|
||||
"ja": "マイクロエージェントを作成するためのプルリクエストを作成しています...",
|
||||
"zh-CN": "正在为您创建微代理的拉取请求...",
|
||||
"zh-TW": "正在為您建立微代理的拉取請求...",
|
||||
"ko-KR": "마이크로에이전트를 생성하기 위한 PR을 열고 있습니다...",
|
||||
"no": "Åpner en PR for å opprette mikroagenten for deg...",
|
||||
"it": "Apertura di una PR per creare il microagente per te...",
|
||||
"pt": "Abrindo um PR para criar o microagente para você...",
|
||||
"es": "Abriendo un PR para crear el microagente para ti...",
|
||||
"ar": "يتم فتح طلب سحب لإنشاء الوكيل الدقيق من أجلك...",
|
||||
"fr": "Ouverture d'une PR pour créer le microagent pour vous...",
|
||||
"tr": "Sizin için mikro ajanı oluşturmak üzere bir PR açılıyor...",
|
||||
"de": "Es wird ein PR geöffnet, um den Microagent für Sie zu erstellen...",
|
||||
"uk": "Відкривається PR для створення мікроагента для вас..."
|
||||
},
|
||||
"MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW": {
|
||||
"en": "PR is ready for review! The microagent has been created successfully.",
|
||||
"ja": "PRのレビューが可能です!マイクロエージェントが正常に作成されました。",
|
||||
"zh-CN": "PR已准备好审核!微代理已成功创建。",
|
||||
"zh-TW": "PR 已準備好審查!微代理已成功建立。",
|
||||
"ko-KR": "PR이 검토를 위해 준비되었습니다! 마이크로에이전트가 성공적으로 생성되었습니다.",
|
||||
"no": "PR er klar for gjennomgang! Mikroagenten har blitt opprettet.",
|
||||
"it": "La PR è pronta per la revisione! Il microagente è stato creato con successo.",
|
||||
"pt": "PR pronto para revisão! O microagente foi criado com sucesso.",
|
||||
"es": "¡La PR está lista para revisión! El microagente se ha creado correctamente.",
|
||||
"ar": "طلب السحب جاهز للمراجعة! تم إنشاء الوكيل الدقيق بنجاح.",
|
||||
"fr": "La PR est prête pour révision ! Le microagent a été créé avec succès.",
|
||||
"tr": "PR incelemeye hazır! Mikro ajan başarıyla oluşturuldu.",
|
||||
"de": "PR ist bereit zur Überprüfung! Der Microagent wurde erfolgreich erstellt.",
|
||||
"uk": "PR готовий до перегляду! Мікроагента успішно створено."
|
||||
},
|
||||
"MICROAGENT_MANAGEMENT$PR_NOT_CREATED": {
|
||||
"en": "The agent has finished its task but was unable to create a PR.",
|
||||
"ja": "エージェントはタスクを完了しましたが、PRを作成できませんでした。",
|
||||
"zh-CN": "代理已完成任务,但无法创建 PR。",
|
||||
"zh-TW": "代理已完成任務,但無法建立 PR。",
|
||||
"ko-KR": "에이전트가 작업을 완료했지만 PR을 생성할 수 없었습니다.",
|
||||
"no": "Agenten har fullført oppgaven, men klarte ikke å opprette en PR.",
|
||||
"it": "L'agente ha terminato il suo compito ma non è riuscito a creare una PR.",
|
||||
"pt": "O agente concluiu sua tarefa, mas não conseguiu criar um PR.",
|
||||
"es": "El agente ha terminado su tarea pero no pudo crear un PR.",
|
||||
"ar": "أكمل الوكيل مهمته لكنه لم يتمكن من إنشاء طلب سحب (PR).",
|
||||
"fr": "L'agent a terminé sa tâche mais n'a pas pu créer de PR.",
|
||||
"tr": "Ajan görevini tamamladı ancak bir PR oluşturamadı.",
|
||||
"de": "Der Agent hat seine Aufgabe abgeschlossen, konnte aber keinen PR erstellen.",
|
||||
"uk": "Агент завершив завдання, але не зміг створити PR."
|
||||
},
|
||||
"MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT": {
|
||||
"en": "Something went wrong. Try initiating the microagent again.",
|
||||
"ja": "問題が発生しました。もう一度マイクロエージェントを開始してください。",
|
||||
"zh-CN": "出现了问题。请重试启动微代理。",
|
||||
"zh-TW": "發生錯誤。請再次嘗試啟動微代理。",
|
||||
"ko-KR": "문제가 발생했습니다. 마이크로에이전트를 다시 시작해 보세요.",
|
||||
"no": "Noe gikk galt. Prøv å starte mikroagenten på nytt.",
|
||||
"it": "Qualcosa è andato storto. Prova a iniziare di nuovo il microagente.",
|
||||
"pt": "Algo deu errado. Tente iniciar o microagente novamente.",
|
||||
"es": "Algo salió mal. Intenta iniciar el microagente de nuevo.",
|
||||
"ar": "حدث خطأ ما. حاول بدء تشغيل الوكيل الدقيق مرة أخرى.",
|
||||
"fr": "Une erreur s'est produite. Essayez de relancer le microagent.",
|
||||
"tr": "Bir şeyler ters gitti. Mikro ajanı tekrar başlatmayı deneyin.",
|
||||
"de": "Etwas ist schiefgelaufen. Versuchen Sie, den Microagenten erneut zu starten.",
|
||||
"uk": "Щось пішло не так. Спробуйте ініціювати мікроагента ще раз."
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@ const SAAS_NAV_ITEMS = [
|
||||
{ to: "/settings/billing", text: "SETTINGS$NAV_CREDITS" },
|
||||
{ to: "/settings/secrets", text: "SETTINGS$NAV_SECRETS" },
|
||||
{ to: "/settings/api-keys", text: "SETTINGS$NAV_API_KEYS" },
|
||||
{ to: "/settings/mcp", text: "SETTINGS$NAV_MCP" },
|
||||
];
|
||||
|
||||
const OSS_NAV_ITEMS = [
|
||||
|
||||
@@ -83,7 +83,7 @@ from openhands.microagent.microagent import BaseMicroagent
|
||||
from openhands.runtime import get_runtime_cls
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.storage.settings.file_settings_store import FileSettingsStore
|
||||
from openhands.utils.utils import create_registry_and_conversation_stats
|
||||
from openhands.utils.utils import create_registry_and_convo_stats
|
||||
|
||||
|
||||
async def cleanup_session(
|
||||
@@ -148,7 +148,7 @@ async def run_session(
|
||||
None, display_initialization_animation, 'Initializing...', is_loaded
|
||||
)
|
||||
|
||||
llm_registry, conversation_stats, config = create_registry_and_conversation_stats(
|
||||
llm_registry, convo_stats, config = create_registry_and_convo_stats(
|
||||
config,
|
||||
sid,
|
||||
None,
|
||||
@@ -169,9 +169,7 @@ async def run_session(
|
||||
|
||||
runtime.subscribe_to_shell_stream(stream_to_console)
|
||||
|
||||
controller, initial_state = create_controller(
|
||||
agent, runtime, config, conversation_stats
|
||||
)
|
||||
controller, initial_state = create_controller(agent, runtime, config, convo_stats)
|
||||
|
||||
event_stream = runtime.event_stream
|
||||
|
||||
@@ -275,7 +273,7 @@ async def run_session(
|
||||
|
||||
if event.agent_state == AgentState.RUNNING:
|
||||
display_agent_running_message()
|
||||
start_pause_listener(loop, is_paused, event_stream, config)
|
||||
start_pause_listener(loop, is_paused, event_stream)
|
||||
|
||||
def on_event(event: Event) -> None:
|
||||
loop.create_task(on_event_async(event))
|
||||
|
||||
+8
-181
@@ -87,9 +87,6 @@ COMMANDS = {
|
||||
|
||||
print_lock = threading.Lock()
|
||||
|
||||
# Lock to debounce sending Ctrl+C interrupts to the running command
|
||||
_interrupt_lock: asyncio.Lock = asyncio.Lock()
|
||||
|
||||
pause_task: asyncio.Task | None = None # No more than one pause task
|
||||
|
||||
|
||||
@@ -662,15 +659,6 @@ def display_help() -> None:
|
||||
commands_html += f'<gold><b>{command}</b></gold> - <grey>{description}</grey>\n'
|
||||
print_formatted_text(HTML(commands_html))
|
||||
|
||||
# Keyboard shortcuts section
|
||||
print_formatted_text(HTML('\nKeyboard shortcuts:'))
|
||||
shortcuts_html = (
|
||||
'<gold><b>Ctrl+P</b></gold> - <grey>Pause the agent</grey>\n'
|
||||
'<gold><b>Ctrl+C</b></gold> - <grey>Pause the agent; press twice quickly to interrupt a running command</grey>\n'
|
||||
'<gold><b>Ctrl+D</b></gold> - <grey>Pause the agent</grey>\n'
|
||||
)
|
||||
print_formatted_text(HTML(shortcuts_html))
|
||||
|
||||
# Footer
|
||||
print_formatted_text(
|
||||
HTML(
|
||||
@@ -876,13 +864,12 @@ async def read_confirmation_input(config: OpenHandsConfig) -> str:
|
||||
def start_pause_listener(
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
done_event: asyncio.Event,
|
||||
event_stream: EventStream,
|
||||
config: OpenHandsConfig,
|
||||
event_stream,
|
||||
) -> None:
|
||||
global pause_task
|
||||
if pause_task is None or pause_task.done():
|
||||
pause_task = loop.create_task(
|
||||
process_agent_pause(done_event, event_stream, config)
|
||||
process_agent_pause(done_event, event_stream)
|
||||
) # Create a task to track agent pause requests from the user
|
||||
|
||||
|
||||
@@ -896,135 +883,16 @@ async def stop_pause_listener() -> None:
|
||||
pause_task = None
|
||||
|
||||
|
||||
def is_command_running(event_stream: EventStream) -> bool:
|
||||
"""Check if a shell command is currently running using bounded reverse search.
|
||||
|
||||
We look at the latest relevant event (CmdRunAction or CmdOutputObservation):
|
||||
- If it's a CmdOutputObservation with a finalized exit_code (>= 0), no command is running
|
||||
- If it's a CmdOutputObservation with exit_code == -1, the command is still running (streaming)
|
||||
- If it's a CmdRunAction (non-input), we assume a command has started and is running
|
||||
"""
|
||||
try:
|
||||
from openhands.events.event_filter import EventFilter
|
||||
|
||||
filt = EventFilter(include_types=(CmdRunAction, CmdOutputObservation))
|
||||
for ev in event_stream.search_events(reverse=True, filter=filt, limit=50):
|
||||
if isinstance(ev, CmdOutputObservation):
|
||||
return ev.metadata.exit_code == -1
|
||||
if isinstance(ev, CmdRunAction):
|
||||
if ev.is_input:
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
# If detection fails for any reason, default to no running command
|
||||
return False
|
||||
|
||||
|
||||
async def _handle_command_interrupt(
|
||||
event_stream: EventStream, config: OpenHandsConfig
|
||||
) -> bool:
|
||||
"""Handle command interruption with user confirmation.
|
||||
|
||||
Returns:
|
||||
bool: True if the interrupt was handled, False if the user wants to pause the agent
|
||||
"""
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML('<gold>Command is currently running.</gold>'))
|
||||
print_formatted_text('')
|
||||
|
||||
# Keep legacy behavior: single Ctrl+C pauses by default. Offer kill as opt-in.
|
||||
choices = [
|
||||
'Pause the agent (default)',
|
||||
'Continue waiting for command to complete',
|
||||
'Send interrupt to running command (Ctrl+C)',
|
||||
]
|
||||
|
||||
# Use the passed-in config so we honor CLI settings like VI mode. Run the blocking UI off the loop.
|
||||
selection = await asyncio.to_thread(
|
||||
cli_confirm, config, 'What would you like to do?', choices, 0
|
||||
)
|
||||
|
||||
if selection == 2: # Send interrupt to the running command
|
||||
print_formatted_text('')
|
||||
print_formatted_text(
|
||||
HTML('<gold>Sending interrupt signal to running command...</gold>')
|
||||
)
|
||||
# Debounce rapid interrupts to avoid multiple concurrent dialogs/interrupts
|
||||
if _interrupt_lock.locked():
|
||||
print_formatted_text(HTML('<grey>Interrupt already sent; waiting…</grey>'))
|
||||
return True
|
||||
async with _interrupt_lock:
|
||||
event_stream.add_event(
|
||||
CmdRunAction(command='C-c', is_input=True),
|
||||
EventSource.USER,
|
||||
)
|
||||
return True
|
||||
elif selection == 1: # Continue waiting
|
||||
print_formatted_text('')
|
||||
print_formatted_text(
|
||||
HTML('<gold>Continuing to wait for command completion...</gold>')
|
||||
)
|
||||
return True
|
||||
else: # Pause the agent (selection == 0)
|
||||
return False
|
||||
|
||||
|
||||
async def _handle_interrupt_async(
|
||||
event_stream: EventStream, done: asyncio.Event, config: OpenHandsConfig
|
||||
) -> None:
|
||||
"""Handle the interrupt asynchronously to avoid blocking the input handler."""
|
||||
try:
|
||||
handled = await _handle_command_interrupt(event_stream, config)
|
||||
if not handled:
|
||||
# User chose to pause the agent
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.PAUSED),
|
||||
EventSource.USER,
|
||||
)
|
||||
done.set()
|
||||
except Exception as e:
|
||||
# If something goes wrong, fall back to pausing the agent
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML(f'<ansired>Error handling interrupt: {e}</ansired>'))
|
||||
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.PAUSED),
|
||||
EventSource.USER,
|
||||
)
|
||||
done.set()
|
||||
|
||||
|
||||
async def process_agent_pause(
|
||||
done: asyncio.Event, event_stream: EventStream, config: OpenHandsConfig
|
||||
) -> None:
|
||||
async def process_agent_pause(done: asyncio.Event, event_stream: EventStream) -> None:
|
||||
input = create_input()
|
||||
|
||||
# Double-press detection window for Ctrl+C to send interrupt to running command
|
||||
CTRL_C_WINDOW_SECONDS = 0.4
|
||||
ctrl_c_timer: asyncio.Task | None = None
|
||||
|
||||
async def pause_after_delay(delay: float) -> None:
|
||||
try:
|
||||
await asyncio.sleep(delay)
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.PAUSED),
|
||||
EventSource.USER,
|
||||
)
|
||||
done.set()
|
||||
except asyncio.CancelledError:
|
||||
# Timer canceled because a second Ctrl+C was detected; do nothing
|
||||
pass
|
||||
|
||||
def keys_ready() -> None:
|
||||
nonlocal ctrl_c_timer
|
||||
for key_press in input.read_keys():
|
||||
if key_press.key == Keys.ControlP or key_press.key == Keys.ControlD:
|
||||
# Immediate pause
|
||||
if (
|
||||
key_press.key == Keys.ControlP
|
||||
or key_press.key == Keys.ControlC
|
||||
or key_press.key == Keys.ControlD
|
||||
):
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
|
||||
event_stream.add_event(
|
||||
@@ -1032,47 +900,6 @@ async def process_agent_pause(
|
||||
EventSource.USER,
|
||||
)
|
||||
done.set()
|
||||
elif key_press.key == Keys.ControlC:
|
||||
if is_command_running(event_stream):
|
||||
# If a timer is already running, this is a double-press: send interrupt
|
||||
if ctrl_c_timer and not ctrl_c_timer.done():
|
||||
ctrl_c_timer.cancel()
|
||||
ctrl_c_timer = None
|
||||
if _interrupt_lock.locked():
|
||||
print_formatted_text(
|
||||
HTML('<grey>Interrupt already sent; waiting…</grey>')
|
||||
)
|
||||
continue
|
||||
|
||||
# Send Ctrl+C to the running command
|
||||
async def send_interrupt() -> None:
|
||||
async with _interrupt_lock:
|
||||
print_formatted_text('')
|
||||
print_formatted_text(
|
||||
HTML(
|
||||
'<gold>Sending interrupt signal to running command...</gold>'
|
||||
)
|
||||
)
|
||||
event_stream.add_event(
|
||||
CmdRunAction(command='C-c', is_input=True),
|
||||
EventSource.USER,
|
||||
)
|
||||
|
||||
asyncio.create_task(send_interrupt())
|
||||
else:
|
||||
# Start a short window; if no second press, pause
|
||||
ctrl_c_timer = asyncio.create_task(
|
||||
pause_after_delay(CTRL_C_WINDOW_SECONDS)
|
||||
)
|
||||
else:
|
||||
# No command running: default immediate pause
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.PAUSED),
|
||||
EventSource.USER,
|
||||
)
|
||||
done.set()
|
||||
|
||||
try:
|
||||
with input.raw_mode():
|
||||
|
||||
@@ -109,7 +109,7 @@ class AgentController:
|
||||
self,
|
||||
agent: Agent,
|
||||
event_stream: EventStream,
|
||||
conversation_stats: ConversationStats,
|
||||
convo_stats: ConversationStats,
|
||||
iteration_delta: int,
|
||||
budget_per_task_delta: float | None = None,
|
||||
agent_to_llm_config: dict[str, LLMConfig] | None = None,
|
||||
@@ -149,7 +149,7 @@ class AgentController:
|
||||
self.agent = agent
|
||||
self.headless_mode = headless_mode
|
||||
self.is_delegate = is_delegate
|
||||
self.conversation_stats = conversation_stats
|
||||
self.convo_stats = convo_stats
|
||||
|
||||
# the event stream must be set before maybe subscribing to it
|
||||
self.event_stream = event_stream
|
||||
@@ -165,7 +165,7 @@ class AgentController:
|
||||
# state from the previous session, state from a parent agent, or a fresh state
|
||||
self.set_initial_state(
|
||||
state=initial_state,
|
||||
conversation_stats=conversation_stats,
|
||||
convo_stats=convo_stats,
|
||||
max_iterations=iteration_delta,
|
||||
max_budget_per_task=budget_per_task_delta,
|
||||
confirmation_mode=confirmation_mode,
|
||||
@@ -687,7 +687,7 @@ class AgentController:
|
||||
user_id=self.user_id,
|
||||
agent=delegate_agent,
|
||||
event_stream=self.event_stream,
|
||||
conversation_stats=self.conversation_stats,
|
||||
convo_stats=self.convo_stats,
|
||||
iteration_delta=self._initial_max_iterations,
|
||||
budget_per_task_delta=self._initial_max_budget_per_task,
|
||||
agent_to_llm_config=self.agent_to_llm_config,
|
||||
@@ -951,7 +951,7 @@ class AgentController:
|
||||
def set_initial_state(
|
||||
self,
|
||||
state: State | None,
|
||||
conversation_stats: ConversationStats,
|
||||
convo_stats: ConversationStats,
|
||||
max_iterations: int,
|
||||
max_budget_per_task: float | None,
|
||||
confirmation_mode: bool = False,
|
||||
@@ -959,7 +959,7 @@ class AgentController:
|
||||
self.state_tracker.set_initial_state(
|
||||
self.id,
|
||||
state,
|
||||
conversation_stats,
|
||||
convo_stats,
|
||||
max_iterations,
|
||||
max_budget_per_task,
|
||||
confirmation_mode,
|
||||
@@ -1000,7 +1000,7 @@ class AgentController:
|
||||
action: The action to attach metrics to
|
||||
"""
|
||||
# Get metrics from agent LLM
|
||||
metrics = self.conversation_stats.get_combined_metrics()
|
||||
metrics = self.convo_stats.get_combined_metrics()
|
||||
|
||||
# Create a clean copy with only the fields we want to keep
|
||||
clean_metrics = Metrics()
|
||||
|
||||
@@ -85,7 +85,7 @@ class State:
|
||||
limit_increase_amount=100, current_value=0, max_value=100
|
||||
)
|
||||
)
|
||||
conversation_stats: ConversationStats | None = None
|
||||
convo_stats: ConversationStats | None = None
|
||||
budget_flag: BudgetControlFlag | None = None
|
||||
confirmation_mode: bool = False
|
||||
history: list[Event] = field(default_factory=list)
|
||||
@@ -122,8 +122,8 @@ class State:
|
||||
def save_to_session(
|
||||
self, sid: str, file_store: FileStore, user_id: str | None
|
||||
) -> None:
|
||||
conversation_stats = self.conversation_stats
|
||||
self.conversation_stats = None # Don't save conversation stats, handles itself
|
||||
convo_stats = self.convo_stats
|
||||
self.convo_stats = None # Don't save convo stats, handles itself
|
||||
|
||||
pickled = pickle.dumps(self)
|
||||
logger.debug(f'Saving state to session {sid}:{self.agent_state}')
|
||||
@@ -144,7 +144,7 @@ class State:
|
||||
logger.error(f'Failed to save state to session: {e}')
|
||||
raise e
|
||||
|
||||
self.conversation_stats = conversation_stats # restore reference
|
||||
self.convo_stats = convo_stats # restore reference
|
||||
|
||||
@staticmethod
|
||||
def restore_from_session(
|
||||
|
||||
@@ -51,7 +51,7 @@ class StateTracker:
|
||||
self,
|
||||
id: str,
|
||||
state: State | None,
|
||||
conversation_stats: ConversationStats,
|
||||
convo_stats: ConversationStats,
|
||||
max_iterations: int,
|
||||
max_budget_per_task: float | None,
|
||||
confirmation_mode: bool = False,
|
||||
@@ -74,7 +74,7 @@ class StateTracker:
|
||||
session_id=id.removesuffix('-delegate'),
|
||||
user_id=self.user_id,
|
||||
inputs={},
|
||||
conversation_stats=conversation_stats,
|
||||
convo_stats=convo_stats,
|
||||
iteration_flag=IterationControlFlag(
|
||||
limit_increase_amount=max_iterations,
|
||||
current_value=0,
|
||||
@@ -99,7 +99,7 @@ class StateTracker:
|
||||
if self.state.start_id <= -1:
|
||||
self.state.start_id = 0
|
||||
|
||||
state.conversation_stats = conversation_stats
|
||||
state.convo_stats = convo_stats
|
||||
|
||||
def _init_history(self, event_stream: EventStream) -> None:
|
||||
"""Initializes the agent's history from the event stream.
|
||||
@@ -248,8 +248,8 @@ class StateTracker:
|
||||
if self.sid and self.file_store:
|
||||
self.state.save_to_session(self.sid, self.file_store, self.user_id)
|
||||
|
||||
if self.state.conversation_stats:
|
||||
self.state.conversation_stats.save_metrics()
|
||||
if self.state.convo_stats:
|
||||
self.state.convo_stats.save_metrics()
|
||||
|
||||
def run_control_flags(self):
|
||||
"""Performs one step of the control flags"""
|
||||
@@ -262,7 +262,7 @@ class StateTracker:
|
||||
Budget flag will monitor for when budget is exceeded
|
||||
"""
|
||||
# Sync cost across all llm services from llm registry
|
||||
if self.state.budget_flag and self.state.conversation_stats:
|
||||
if self.state.budget_flag and self.state.convo_stats:
|
||||
self.state.budget_flag.current_value = (
|
||||
self.state.conversation_stats.get_combined_metrics().accumulated_cost
|
||||
self.state.convo_stats.get_combined_metrics().accumulated_cost
|
||||
)
|
||||
|
||||
@@ -172,6 +172,9 @@ class LLMConfig(BaseModel):
|
||||
|
||||
# Set reasoning_effort to 'high' by default for non-Gemini models
|
||||
# Gemini models use optimized thinking budget when reasoning_effort is None
|
||||
logger.debug(
|
||||
f'Setting reasoning_effort for model {self.model} with reasoning_effort {self.reasoning_effort}'
|
||||
)
|
||||
if self.reasoning_effort is None and 'gemini-2.5-pro' not in self.model:
|
||||
self.reasoning_effort = 'high'
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ class SandboxConfig(BaseModel):
|
||||
remote_runtime_enable_retries: Whether to enable retries (on recoverable errors like requests.ConnectionError) for the remote runtime API requests.
|
||||
enable_auto_lint: Whether to enable auto-lint.
|
||||
use_host_network: Whether to use the host network.
|
||||
additional_networks: A list of additional Docker networks to connect to
|
||||
runtime_binding_address: The binding address for the runtime ports. It specifies which network interface on the host machine Docker should bind the runtime ports to.
|
||||
initialize_plugins: Whether to initialize plugins.
|
||||
force_rebuild_runtime: Whether to force rebuild the runtime image.
|
||||
@@ -66,7 +65,6 @@ class SandboxConfig(BaseModel):
|
||||
default=False
|
||||
) # once enabled, OpenHands would lint files after editing
|
||||
use_host_network: bool = Field(default=False)
|
||||
additional_networks: list[str] = Field(default=[])
|
||||
runtime_binding_address: str = Field(default='0.0.0.0')
|
||||
runtime_extra_build_args: list[str] | None = Field(default=None)
|
||||
initialize_plugins: bool = Field(default=True)
|
||||
|
||||
@@ -36,7 +36,7 @@ from openhands.mcp import add_mcp_tools_to_agent
|
||||
from openhands.memory.memory import Memory
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.utils.async_utils import call_async_from_sync
|
||||
from openhands.utils.utils import create_registry_and_conversation_stats
|
||||
from openhands.utils.utils import create_registry_and_convo_stats
|
||||
|
||||
|
||||
class FakeUserResponseFunc(Protocol):
|
||||
@@ -96,7 +96,7 @@ async def run_controller(
|
||||
"""
|
||||
sid = sid or generate_sid(config)
|
||||
|
||||
llm_registry, conversation_stats, config = create_registry_and_conversation_stats(
|
||||
llm_registry, convo_stats, config = create_registry_and_convo_stats(
|
||||
config,
|
||||
sid,
|
||||
None,
|
||||
@@ -163,7 +163,7 @@ async def run_controller(
|
||||
)
|
||||
|
||||
controller, initial_state = create_controller(
|
||||
agent, runtime, config, conversation_stats, replay_events=replay_events
|
||||
agent, runtime, config, convo_stats, replay_events=replay_events
|
||||
)
|
||||
|
||||
assert isinstance(initial_user_action, Action), (
|
||||
|
||||
@@ -218,7 +218,7 @@ def create_controller(
|
||||
agent: Agent,
|
||||
runtime: Runtime,
|
||||
config: OpenHandsConfig,
|
||||
conversation_stats: ConversationStats,
|
||||
convo_stats: ConversationStats,
|
||||
headless_mode: bool = True,
|
||||
replay_events: list[Event] | None = None,
|
||||
) -> tuple[AgentController, State | None]:
|
||||
@@ -236,7 +236,7 @@ def create_controller(
|
||||
|
||||
controller = AgentController(
|
||||
agent=agent,
|
||||
conversation_stats=conversation_stats,
|
||||
convo_stats=convo_stats,
|
||||
iteration_delta=config.max_iterations,
|
||||
budget_per_task_delta=config.max_budget_per_task,
|
||||
agent_to_llm_config=config.get_agent_to_llm_config_map(),
|
||||
|
||||
@@ -321,36 +321,6 @@ class GitHubService(BaseGitService, GitService, InstallationsService):
|
||||
installations = response.get('installations', [])
|
||||
return [str(i['id']) for i in installations]
|
||||
|
||||
async def get_user_organizations(self) -> list[str]:
|
||||
"""Get list of organization logins that the user is a member of."""
|
||||
url = f'{self.BASE_URL}/user/orgs'
|
||||
try:
|
||||
response, _ = await self._make_request(url)
|
||||
orgs = [org['login'] for org in response]
|
||||
return orgs
|
||||
except Exception as e:
|
||||
logger.warning(f'Failed to get user organizations: {e}')
|
||||
return []
|
||||
|
||||
def _fuzzy_match_org_name(self, query: str, org_name: str) -> bool:
|
||||
"""Check if query fuzzy matches organization name."""
|
||||
query_lower = query.lower().replace('-', '').replace('_', '').replace(' ', '')
|
||||
org_lower = org_name.lower().replace('-', '').replace('_', '').replace(' ', '')
|
||||
|
||||
# Exact match after normalization
|
||||
if query_lower == org_lower:
|
||||
return True
|
||||
|
||||
# Query is a substring of org name
|
||||
if query_lower in org_lower:
|
||||
return True
|
||||
|
||||
# Org name is a substring of query (less common but possible)
|
||||
if org_lower in query_lower:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def search_repositories(
|
||||
self, query: str, per_page: int, sort: str, order: str, public: bool
|
||||
) -> list[Repository]:
|
||||
@@ -371,68 +341,21 @@ class GitHubService(BaseGitService, GitService, InstallationsService):
|
||||
# Add is:public to the query to ensure we only search for public repositories
|
||||
params['q'] = f'in:name {org}/{repo_name} is:public'
|
||||
|
||||
# Handle private repository searches
|
||||
# Perhaps we should go through all orgs and the search for repos under every org
|
||||
# Currently it will only search user repos, and org repos when '/' is in the name
|
||||
if not public and '/' in query:
|
||||
org, repo_query = query.split('/', 1)
|
||||
query_with_user = f'org:{org} in:name {repo_query}'
|
||||
params['q'] = query_with_user
|
||||
elif not public:
|
||||
# Expand search scope to include user's repositories and organizations they're a member of
|
||||
user = await self.get_user()
|
||||
user_orgs = await self.get_user_organizations()
|
||||
params['q'] = f'in:name {query} user:{user.login}'
|
||||
|
||||
# Search in user repos and org repos separately
|
||||
all_repos = []
|
||||
|
||||
# Search in user repositories
|
||||
user_query = f'{query} user:{user.login}'
|
||||
user_params = params.copy()
|
||||
user_params['q'] = user_query
|
||||
|
||||
try:
|
||||
user_response, _ = await self._make_request(url, user_params)
|
||||
user_items = user_response.get('items', [])
|
||||
all_repos.extend(user_items)
|
||||
except Exception as e:
|
||||
logger.warning(f'User search failed: {e}')
|
||||
|
||||
# Search for repos named "query" in each organization
|
||||
for org in user_orgs:
|
||||
org_query = f'{query} org:{org}'
|
||||
org_params = params.copy()
|
||||
org_params['q'] = org_query
|
||||
|
||||
try:
|
||||
org_response, _ = await self._make_request(url, org_params)
|
||||
org_items = org_response.get('items', [])
|
||||
all_repos.extend(org_items)
|
||||
except Exception as e:
|
||||
logger.warning(f'Org {org} search failed: {e}')
|
||||
|
||||
# Also search for top repos from orgs that match the query name
|
||||
for org in user_orgs:
|
||||
if self._fuzzy_match_org_name(query, org):
|
||||
org_repos_query = f'org:{org}'
|
||||
org_repos_params = params.copy()
|
||||
org_repos_params['q'] = org_repos_query
|
||||
org_repos_params['sort'] = 'stars'
|
||||
org_repos_params['per_page'] = 2 # Limit to first 2 repos
|
||||
|
||||
try:
|
||||
org_repos_response, _ = await self._make_request(
|
||||
url, org_repos_params
|
||||
)
|
||||
org_repo_items = org_repos_response.get('items', [])
|
||||
all_repos.extend(org_repo_items)
|
||||
except Exception as e:
|
||||
logger.warning(f'Org repos search for {org} failed: {e}')
|
||||
|
||||
return [self._parse_repository(repo) for repo in all_repos]
|
||||
|
||||
# Default case (public search or slash query)
|
||||
response, _ = await self._make_request(url, params)
|
||||
repo_items = response.get('items', [])
|
||||
return [self._parse_repository(repo) for repo in repo_items]
|
||||
repos = [self._parse_repository(repo) for repo in repo_items]
|
||||
|
||||
return repos
|
||||
|
||||
async def execute_graphql_query(
|
||||
self, query: str, variables: dict[str, Any]
|
||||
|
||||
@@ -313,8 +313,22 @@ class GitLabService(BaseGitService, GitService):
|
||||
if not repo_path:
|
||||
return [] # Invalid URL format
|
||||
|
||||
repository = await self.get_repository_details_from_repo_name(repo_path)
|
||||
return [repository]
|
||||
# First try authenticated request (if token present)
|
||||
try:
|
||||
repository = await self.get_repository_details_from_repo_name(repo_path)
|
||||
return [repository]
|
||||
except Exception:
|
||||
# Fall back to unauthenticated request for public repositories
|
||||
try:
|
||||
encoded_name = repo_path.replace('/', '%2F')
|
||||
url = f'{self.BASE_URL}/projects/{encoded_name}'
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return [self._parse_repository(data)]
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
return await self.get_paginated_repos(1, per_page, sort, None, query)
|
||||
|
||||
@@ -532,11 +546,17 @@ class GitLabService(BaseGitService, GitService):
|
||||
self, repository: str
|
||||
) -> Repository:
|
||||
encoded_name = repository.replace('/', '%2F')
|
||||
|
||||
url = f'{self.BASE_URL}/projects/{encoded_name}'
|
||||
repo, _ = await self._make_request(url)
|
||||
|
||||
return self._parse_repository(repo)
|
||||
try:
|
||||
repo, _ = await self._make_request(url)
|
||||
return self._parse_repository(repo)
|
||||
except Exception:
|
||||
# Fall back to unauthenticated request for public repositories
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return self._parse_repository(data)
|
||||
|
||||
async def get_branches(self, repository: str) -> list[Branch]:
|
||||
"""Get branches for a repository"""
|
||||
|
||||
@@ -603,8 +603,8 @@ class ProviderHandler:
|
||||
# Try to use token if available, otherwise use public URL
|
||||
if self.provider_tokens and provider in self.provider_tokens:
|
||||
git_token = self.provider_tokens[provider].token
|
||||
if git_token:
|
||||
token_value = git_token.get_secret_value()
|
||||
token_value = git_token.get_secret_value() if git_token else ''
|
||||
if token_value:
|
||||
if provider == ProviderType.GITLAB:
|
||||
remote_url = (
|
||||
f'https://oauth2:{token_value}@{domain}/{repo_name}.git'
|
||||
@@ -621,6 +621,7 @@ class ProviderHandler:
|
||||
# GitHub
|
||||
remote_url = f'https://{token_value}@{domain}/{repo_name}.git'
|
||||
else:
|
||||
# No token available or empty: use public HTTPS URL
|
||||
remote_url = f'https://{domain}/{repo_name}.git'
|
||||
else:
|
||||
remote_url = f'https://{domain}/{repo_name}.git'
|
||||
|
||||
+872
-782
File diff suppressed because it is too large
Load Diff
@@ -1,8 +1,8 @@
|
||||
import * as path from "path";
|
||||
import Mocha = require("mocha");
|
||||
import { glob } from "glob"; // Updated for glob v9+ API
|
||||
import Mocha = require("mocha"); // Changed import style
|
||||
import glob = require("glob"); // Changed import style
|
||||
|
||||
export async function run(): Promise<void> {
|
||||
export function run(): Promise<void> {
|
||||
// Create the mocha test
|
||||
const mocha = new Mocha({
|
||||
// This should now work with the changed import
|
||||
@@ -13,25 +13,33 @@ export async function run(): Promise<void> {
|
||||
|
||||
const testsRoot = path.resolve(__dirname, ".."); // Root of the /src/test folder (compiled to /out/test)
|
||||
|
||||
try {
|
||||
return new Promise((c, e) => {
|
||||
// Use glob to find all test files (ending with .test.js in the compiled output)
|
||||
const files = await glob("**/**.test.js", { cwd: testsRoot });
|
||||
|
||||
// Add files to the test suite
|
||||
files.forEach((f: string) => mocha.addFile(path.resolve(testsRoot, f)));
|
||||
|
||||
// Run the mocha test
|
||||
return await new Promise<void>((resolve, reject) => {
|
||||
mocha.run((failures: number) => {
|
||||
if (failures > 0) {
|
||||
reject(new Error(`${failures} tests failed.`));
|
||||
} else {
|
||||
resolve();
|
||||
glob(
|
||||
"**/**.test.js",
|
||||
{ cwd: testsRoot },
|
||||
(err: NodeJS.ErrnoException | null, files: string[]) => {
|
||||
if (err) {
|
||||
return e(err);
|
||||
}
|
||||
});
|
||||
});
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
throw err;
|
||||
}
|
||||
|
||||
// Add files to the test suite
|
||||
files.forEach((f: string) => mocha.addFile(path.resolve(testsRoot, f)));
|
||||
|
||||
try {
|
||||
// Run the mocha test
|
||||
mocha.run((failures: number) => {
|
||||
if (failures > 0) {
|
||||
e(new Error(`${failures} tests failed.`));
|
||||
} else {
|
||||
c();
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
e(err);
|
||||
}
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -9,8 +9,8 @@ from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.llm.llm import (
|
||||
LLM,
|
||||
LLM_RETRY_EXCEPTIONS,
|
||||
REASONING_EFFORT_SUPPORTED_MODELS,
|
||||
)
|
||||
from openhands.llm.model_features import get_features
|
||||
from openhands.utils.shutdown_listener import should_continue
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ class AsyncLLM(LLM):
|
||||
messages = kwargs['messages']
|
||||
|
||||
# Set reasoning effort for models that support it
|
||||
if get_features(self.config.model).supports_reasoning_effort:
|
||||
if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS:
|
||||
kwargs['reasoning_effort'] = self.config.reasoning_effort
|
||||
|
||||
# ensure we work with a list of messages
|
||||
|
||||
@@ -705,25 +705,6 @@ def _fix_stopword(content: str) -> str:
|
||||
return content
|
||||
|
||||
|
||||
def _normalize_parameter_tags(fn_body: str) -> str:
|
||||
"""Normalize malformed parameter tags to the canonical format.
|
||||
|
||||
Some models occasionally emit malformed parameter tags like:
|
||||
<parameter=command=str_replace</parameter>
|
||||
instead of the correct:
|
||||
<parameter=command>str_replace</parameter>
|
||||
|
||||
This function rewrites the malformed form into the correct one to allow
|
||||
downstream parsing to succeed.
|
||||
"""
|
||||
# Replace '<parameter=name=value</parameter>' with '<parameter=name>value</parameter>'
|
||||
return re.sub(
|
||||
r'<parameter=([a-zA-Z0-9_]+)=([^<]*)</parameter>',
|
||||
r'<parameter=\1>\2</parameter>',
|
||||
fn_body,
|
||||
)
|
||||
|
||||
|
||||
def convert_non_fncall_messages_to_fncall_messages(
|
||||
messages: list[dict],
|
||||
tools: list[ChatCompletionToolParam],
|
||||
@@ -871,7 +852,7 @@ def convert_non_fncall_messages_to_fncall_messages(
|
||||
|
||||
if fn_match:
|
||||
fn_name = fn_match.group(1)
|
||||
fn_body = _normalize_parameter_tags(fn_match.group(2))
|
||||
fn_body = fn_match.group(2)
|
||||
matching_tool = next(
|
||||
(
|
||||
tool['function']
|
||||
|
||||
+96
-26
@@ -9,7 +9,6 @@ import httpx
|
||||
|
||||
from openhands.core.config import LLMConfig
|
||||
from openhands.llm.metrics import Metrics
|
||||
from openhands.llm.model_features import get_features
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore')
|
||||
@@ -50,6 +49,79 @@ LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
|
||||
LLMNoResponseError,
|
||||
)
|
||||
|
||||
# cache prompt supporting models
|
||||
# remove this when we gemini and deepseek are supported
|
||||
CACHE_PROMPT_SUPPORTED_MODELS = [
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3.7-sonnet',
|
||||
'claude-3-5-sonnet-20241022',
|
||||
'claude-3-5-sonnet-20240620',
|
||||
'claude-3-5-haiku-20241022',
|
||||
'claude-3-haiku-20240307',
|
||||
'claude-3-opus-20240229',
|
||||
'claude-sonnet-4-20250514',
|
||||
'claude-sonnet-4',
|
||||
'claude-opus-4-20250514',
|
||||
'claude-opus-4-1-20250805',
|
||||
]
|
||||
|
||||
# function calling supporting models
|
||||
FUNCTION_CALLING_SUPPORTED_MODELS = [
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3-5-sonnet',
|
||||
'claude-3-5-sonnet-20240620',
|
||||
'claude-3-5-sonnet-20241022',
|
||||
'claude-3.5-haiku',
|
||||
'claude-3-5-haiku-20241022',
|
||||
'claude-sonnet-4-20250514',
|
||||
'claude-sonnet-4',
|
||||
'claude-opus-4-20250514',
|
||||
'claude-opus-4-1-20250805',
|
||||
'gpt-4o-mini',
|
||||
'gpt-4o',
|
||||
'o1-2024-12-17',
|
||||
'o3-mini-2025-01-31',
|
||||
'o3-mini',
|
||||
'o3',
|
||||
'o3-2025-04-16',
|
||||
'o4-mini',
|
||||
'o4-mini-2025-04-16',
|
||||
'gemini-2.5-pro',
|
||||
'gpt-4.1',
|
||||
'kimi-k2-0711-preview',
|
||||
'kimi-k2-instruct',
|
||||
'Qwen3-Coder-480B-A35B-Instruct',
|
||||
'qwen3-coder', # this will match both qwen3-coder-480b (openhands provider) and qwen3-coder (for openrouter)
|
||||
'gpt-5',
|
||||
'gpt-5-2025-08-07',
|
||||
]
|
||||
|
||||
REASONING_EFFORT_SUPPORTED_MODELS = [
|
||||
'o1-2024-12-17',
|
||||
'o1',
|
||||
'o3',
|
||||
'o3-2025-04-16',
|
||||
'o3-mini-2025-01-31',
|
||||
'o3-mini',
|
||||
'o4-mini',
|
||||
'o4-mini-2025-04-16',
|
||||
'gemini-2.5-flash',
|
||||
'gemini-2.5-pro',
|
||||
'gpt-5',
|
||||
'gpt-5-2025-08-07',
|
||||
'claude-opus-4-1-20250805', # we need to remove top_p for opus 4.1
|
||||
]
|
||||
|
||||
MODELS_WITHOUT_STOP_WORDS = [
|
||||
'o1-mini',
|
||||
'o1-preview',
|
||||
'o1',
|
||||
'o1-2024-12-17',
|
||||
'xai/grok-4-0709',
|
||||
]
|
||||
|
||||
|
||||
class LLM(RetryMixin, DebugMixin):
|
||||
"""The LLM class represents a Language Model instance.
|
||||
@@ -82,7 +154,6 @@ class LLM(RetryMixin, DebugMixin):
|
||||
)
|
||||
|
||||
self.model_info: ModelInfo | None = None
|
||||
self._function_calling_active: bool = False
|
||||
self.retry_listener = retry_listener
|
||||
if self.config.log_completions:
|
||||
if self.config.log_completions_folder is None:
|
||||
@@ -131,8 +202,10 @@ class LLM(RetryMixin, DebugMixin):
|
||||
f'Rewrote openhands/{model_name} to {self.config.model} with base URL {self.config.base_url}'
|
||||
)
|
||||
|
||||
features = get_features(self.config.model)
|
||||
if features.supports_reasoning_effort:
|
||||
if (
|
||||
self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS
|
||||
or self.config.model.split('/')[-1] in REASONING_EFFORT_SUPPORTED_MODELS
|
||||
):
|
||||
# For Gemini models, only map 'low' to optimized thinking budget
|
||||
# Let other reasoning_effort values pass through to API as-is
|
||||
if 'gemini-2.5-pro' in self.config.model:
|
||||
@@ -166,20 +239,6 @@ class LLM(RetryMixin, DebugMixin):
|
||||
elif 'gemini' in self.config.model.lower() and self.config.safety_settings:
|
||||
kwargs['safety_settings'] = self.config.safety_settings
|
||||
|
||||
# Explicitly disable Anthropic extended thinking for Opus 4.1 to avoid
|
||||
# requiring 'thinking' content blocks. See issue #10510.
|
||||
if 'claude-opus-4-1' in self.config.model.lower():
|
||||
kwargs['thinking'] = {'type': 'disabled'}
|
||||
|
||||
# Anthropic constraint: Opus models cannot accept both temperature and top_p
|
||||
# Prefer temperature (drop top_p) if both are specified.
|
||||
_model_lower = self.config.model.lower()
|
||||
# Limit to Opus 4.1 specifically to avoid changing behavior of other Anthropic models
|
||||
if ('claude-opus-4-1' in _model_lower) and (
|
||||
'temperature' in kwargs and 'top_p' in kwargs
|
||||
):
|
||||
kwargs.pop('top_p', None)
|
||||
|
||||
self._completion = partial(
|
||||
litellm_completion,
|
||||
model=self.config.model,
|
||||
@@ -253,7 +312,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
|
||||
# add stop words if the model supports it and stop words are not disabled
|
||||
if (
|
||||
get_features(self.config.model).supports_stop_words
|
||||
self.config.model not in MODELS_WITHOUT_STOP_WORDS
|
||||
and not self.config.disable_stop_word
|
||||
):
|
||||
kwargs['stop'] = STOP_WORDS
|
||||
@@ -497,10 +556,17 @@ class LLM(RetryMixin, DebugMixin):
|
||||
):
|
||||
self.config.max_output_tokens = self.model_info['max_tokens']
|
||||
|
||||
# Initialize function calling using centralized model features
|
||||
features = get_features(self.config.model)
|
||||
# Initialize function calling capability
|
||||
# Check if model name is in our supported list
|
||||
model_name_supported = (
|
||||
self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||
or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||
or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
|
||||
)
|
||||
|
||||
# Handle native_tool_calling user-defined configuration
|
||||
if self.config.native_tool_calling is None:
|
||||
self._function_calling_active = features.supports_function_calling
|
||||
self._function_calling_active = model_name_supported
|
||||
else:
|
||||
self._function_calling_active = self.config.native_tool_calling
|
||||
|
||||
@@ -535,10 +601,14 @@ class LLM(RetryMixin, DebugMixin):
|
||||
Returns:
|
||||
boolean: True if prompt caching is supported and enabled for the given model.
|
||||
"""
|
||||
if not self.config.caching_prompt:
|
||||
return False
|
||||
# We don't need to look-up model_info, because only Anthropic models need explicit caching breakpoints
|
||||
return get_features(self.config.model).supports_prompt_cache
|
||||
return (
|
||||
self.config.caching_prompt is True
|
||||
and (
|
||||
self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
|
||||
or self.config.model.split('/')[-1] in CACHE_PROMPT_SUPPORTED_MODELS
|
||||
)
|
||||
# We don't need to look-up model_info, because only Anthropic models needs the explicit caching breakpoint
|
||||
)
|
||||
|
||||
def is_function_calling_active(self) -> bool:
|
||||
"""Returns whether function calling is supported and enabled for this LLM instance.
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from fnmatch import fnmatch
|
||||
|
||||
|
||||
def normalize_model_name(model: str) -> str:
|
||||
"""Normalize a model string to a canonical, comparable name.
|
||||
|
||||
Strategy:
|
||||
- Trim whitespace
|
||||
- Lowercase
|
||||
- If there is a '/', keep only the basename after the last '/'
|
||||
(handles prefixes like openrouter/, litellm_proxy/, anthropic/, etc.)
|
||||
and treat ':' inside that basename as an Ollama-style variant tag to be removed
|
||||
- There is no provider:model form; providers, when present, use 'provider/model'
|
||||
- Drop a trailing "-gguf" suffix if present
|
||||
"""
|
||||
raw = (model or '').strip().lower()
|
||||
if '/' in raw:
|
||||
name = raw.split('/')[-1]
|
||||
if ':' in name:
|
||||
# Drop Ollama-style variant tag in basename
|
||||
name = name.split(':', 1)[0]
|
||||
else:
|
||||
# No '/', keep the whole raw name (we do not support provider:model)
|
||||
name = raw
|
||||
if name.endswith('-gguf'):
|
||||
name = name[: -len('-gguf')]
|
||||
return name
|
||||
|
||||
|
||||
def model_matches(model: str, patterns: list[str]) -> bool:
|
||||
"""Return True if the model matches any of the glob patterns.
|
||||
|
||||
If a pattern contains a '/', it is treated as provider-qualified and matched
|
||||
against the full, lowercased model string (including provider prefix).
|
||||
Otherwise, it is matched against the normalized basename.
|
||||
"""
|
||||
raw = (model or '').strip().lower()
|
||||
name = normalize_model_name(model)
|
||||
for pat in patterns:
|
||||
pat_l = pat.lower()
|
||||
if '/' in pat_l:
|
||||
if fnmatch(raw, pat_l):
|
||||
return True
|
||||
else:
|
||||
if fnmatch(name, pat_l):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ModelFeatures:
|
||||
supports_function_calling: bool
|
||||
supports_reasoning_effort: bool
|
||||
supports_prompt_cache: bool
|
||||
supports_stop_words: bool
|
||||
|
||||
|
||||
# Pattern tables capturing current behavior. Keep patterns lowercase.
|
||||
FUNCTION_CALLING_PATTERNS: list[str] = [
|
||||
# Anthropic families
|
||||
'claude-3-7-sonnet*',
|
||||
'claude-3.7-sonnet*',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3-5-sonnet*',
|
||||
'claude-3.5-haiku*',
|
||||
'claude-3-5-haiku*',
|
||||
'claude-sonnet-4*',
|
||||
'claude-opus-4*',
|
||||
# OpenAI families
|
||||
'gpt-4o*',
|
||||
'gpt-4.1',
|
||||
'gpt-5*',
|
||||
# o-series (keep exact o1 support per existing list)
|
||||
'o1-2024-12-17',
|
||||
'o3*',
|
||||
'o4-mini*',
|
||||
# Google Gemini
|
||||
'gemini-2.5-pro*',
|
||||
# Others
|
||||
'kimi-k2-0711-preview',
|
||||
'kimi-k2-instruct',
|
||||
'qwen3-coder*',
|
||||
'qwen3-coder-480b-a35b-instruct',
|
||||
]
|
||||
|
||||
REASONING_EFFORT_PATTERNS: list[str] = [
|
||||
# Mirror main behavior exactly (no unintended expansion), plus DeepSeek support
|
||||
'o1-2024-12-17',
|
||||
'o1',
|
||||
'o3',
|
||||
'o3-2025-04-16',
|
||||
'o3-mini-2025-01-31',
|
||||
'o3-mini',
|
||||
'o4-mini',
|
||||
'o4-mini-2025-04-16',
|
||||
'gemini-2.5-flash',
|
||||
'gemini-2.5-pro',
|
||||
'gpt-5',
|
||||
'gpt-5-2025-08-07',
|
||||
# DeepSeek reasoning family
|
||||
'deepseek-r1-0528*',
|
||||
]
|
||||
|
||||
PROMPT_CACHE_PATTERNS: list[str] = [
|
||||
'claude-3-7-sonnet*',
|
||||
'claude-3.7-sonnet*',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3-5-sonnet*',
|
||||
'claude-3-5-haiku*',
|
||||
'claude-3.5-haiku*',
|
||||
'claude-3-haiku-20240307',
|
||||
'claude-3-opus-20240229',
|
||||
'claude-sonnet-4*',
|
||||
'claude-opus-4*',
|
||||
]
|
||||
|
||||
SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [
|
||||
# o1 family doesn't support stop words
|
||||
'o1*',
|
||||
# grok-4 specific model name (basename)
|
||||
'grok-4-0709',
|
||||
# DeepSeek R1 family
|
||||
'deepseek-r1-0528*',
|
||||
]
|
||||
|
||||
|
||||
def get_features(model: str) -> ModelFeatures:
|
||||
return ModelFeatures(
|
||||
supports_function_calling=model_matches(model, FUNCTION_CALLING_PATTERNS),
|
||||
supports_reasoning_effort=model_matches(model, REASONING_EFFORT_PATTERNS),
|
||||
supports_prompt_cache=model_matches(model, PROMPT_CACHE_PATTERNS),
|
||||
supports_stop_words=not model_matches(
|
||||
model, SUPPORTS_STOP_WORDS_FALSE_PATTERNS
|
||||
),
|
||||
)
|
||||
@@ -5,7 +5,7 @@ from typing import Any, Callable
|
||||
from openhands.core.exceptions import UserCancelledError
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.llm.async_llm import LLM_RETRY_EXCEPTIONS, AsyncLLM
|
||||
from openhands.llm.model_features import get_features
|
||||
from openhands.llm.llm import REASONING_EFFORT_SUPPORTED_MODELS
|
||||
|
||||
|
||||
class StreamingLLM(AsyncLLM):
|
||||
@@ -65,7 +65,7 @@ class StreamingLLM(AsyncLLM):
|
||||
)
|
||||
|
||||
# Set reasoning effort for models that support it
|
||||
if get_features(self.config.model).supports_reasoning_effort:
|
||||
if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS:
|
||||
kwargs['reasoning_effort'] = self.config.reasoning_effort
|
||||
|
||||
self.log_prompt(messages)
|
||||
|
||||
+18
-31
@@ -67,7 +67,6 @@ from openhands.runtime.plugins import (
|
||||
from openhands.runtime.runtime_status import RuntimeStatus
|
||||
from openhands.runtime.utils.edit import FileEditRuntimeMixin
|
||||
from openhands.runtime.utils.git_handler import CommandResult, GitHandler
|
||||
from openhands.storage.locations import get_conversation_dir
|
||||
from openhands.utils.async_utils import (
|
||||
GENERAL_TIMEOUT,
|
||||
call_async_from_sync,
|
||||
@@ -877,14 +876,8 @@ fi
|
||||
if isinstance(action, AgentThinkAction):
|
||||
return AgentThinkObservation('Your thought has been logged.')
|
||||
elif isinstance(action, TaskTrackingAction):
|
||||
# Get the session-specific task file path
|
||||
conversation_dir = get_conversation_dir(
|
||||
self.sid, self.event_stream.user_id
|
||||
)
|
||||
task_file_path = f'{conversation_dir}TASKS.md'
|
||||
|
||||
# If `command` is `plan`, write the serialized task list to the file TASKS.md under `.openhands/`
|
||||
if action.command == 'plan':
|
||||
# Write the serialized task list to the session directory
|
||||
content = '# Task List\n\n'
|
||||
for i, task in enumerate(action.task_list, 1):
|
||||
status_icon = {
|
||||
@@ -893,39 +886,33 @@ fi
|
||||
'done': '✅',
|
||||
}.get(task.get('status', 'todo'), '⏳')
|
||||
content += f'{i}. {status_icon} {task.get("title", "")}\n{task.get("notes", "")}\n'
|
||||
|
||||
try:
|
||||
self.event_stream.file_store.write(task_file_path, content)
|
||||
return TaskTrackingObservation(
|
||||
content=f'Task list has been updated with {len(action.task_list)} items. Stored in session directory: {task_file_path}',
|
||||
command=action.command,
|
||||
task_list=action.task_list,
|
||||
)
|
||||
except Exception as e:
|
||||
write_obs = self.write(
|
||||
FileWriteAction(path='.openhands/TASKS.md', content=content)
|
||||
)
|
||||
if isinstance(write_obs, ErrorObservation):
|
||||
return ErrorObservation(
|
||||
f'Failed to write task list to session directory {task_file_path}: {str(e)}'
|
||||
f'Failed to write task list to .openhands/TASKS.md: {write_obs.content}'
|
||||
)
|
||||
|
||||
return TaskTrackingObservation(
|
||||
content=f'Task list has been updated with {len(action.task_list)} items.',
|
||||
command=action.command,
|
||||
task_list=action.task_list,
|
||||
)
|
||||
elif action.command == 'view':
|
||||
# Read the TASKS.md file from the session directory
|
||||
try:
|
||||
content = self.event_stream.file_store.read(task_file_path)
|
||||
# If `command` is `view`, read the TASKS.md file and return its content
|
||||
read_obs = self.read(FileReadAction(path='.openhands/TASKS.md'))
|
||||
if isinstance(read_obs, FileReadObservation):
|
||||
return TaskTrackingObservation(
|
||||
content=content,
|
||||
content=read_obs.content,
|
||||
command=action.command,
|
||||
task_list=[], # Empty for view command
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return TaskTrackingObservation(
|
||||
else:
|
||||
return TaskTrackingObservation( # Return observation if error occurs because file might not exist yet
|
||||
command=action.command,
|
||||
task_list=[],
|
||||
content='No task list found. Use the "plan" command to create one.',
|
||||
)
|
||||
except Exception as e:
|
||||
return TaskTrackingObservation(
|
||||
command=action.command,
|
||||
task_list=[],
|
||||
content=f'Failed to read the task list from session directory {task_file_path}. Error: {str(e)}',
|
||||
content=f'Failed to read the task list. Error: {read_obs.content}',
|
||||
)
|
||||
|
||||
return NullObservation('')
|
||||
|
||||
@@ -213,23 +213,6 @@ class DockerRuntime(ActionExecutionClient):
|
||||
self.set_runtime_status(RuntimeStatus.READY)
|
||||
self._runtime_initialized = True
|
||||
|
||||
for network_name in self.config.sandbox.additional_networks:
|
||||
try:
|
||||
network = self.docker_client.networks.get(network_name)
|
||||
if self.container is not None:
|
||||
network.connect(self.container)
|
||||
else:
|
||||
self.log(
|
||||
'warning',
|
||||
f'Container not available to connect to network {network_name}',
|
||||
)
|
||||
except Exception as e:
|
||||
self.log(
|
||||
'error',
|
||||
f'Error: Failed to connect instance {self.container_name} to network {network_name}',
|
||||
)
|
||||
self.log('error', str(e))
|
||||
|
||||
def maybe_build_runtime_container_image(self):
|
||||
if self.runtime_container_image is None:
|
||||
if self.base_container_image is None:
|
||||
|
||||
@@ -40,7 +40,7 @@ Two configuration options are required to use the Kubernetes runtime:
|
||||
2. **Runtime Container Image**: Specify the container image to use for the runtime environment
|
||||
```toml
|
||||
[sandbox]
|
||||
runtime_container_image = "docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik"
|
||||
runtime_container_image = "docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik"
|
||||
```
|
||||
|
||||
#### Additional Kubernetes Options
|
||||
|
||||
@@ -201,14 +201,8 @@ class LocalRuntime(ActionExecutionClient):
|
||||
|
||||
# If there is an API key in the environment we use this in requests to the runtime
|
||||
session_api_key = os.getenv('SESSION_API_KEY')
|
||||
self._session_api_key: str | None = None
|
||||
if session_api_key:
|
||||
self.session.headers['X-Session-API-Key'] = session_api_key
|
||||
self._session_api_key = session_api_key
|
||||
|
||||
@property
|
||||
def session_api_key(self) -> str | None:
|
||||
return self._session_api_key
|
||||
|
||||
@property
|
||||
def action_execution_server_url(self) -> str:
|
||||
|
||||
@@ -177,7 +177,9 @@ RUN \
|
||||
/openhands/micromamba/bin/micromamba run -n openhands poetry install --only main,runtime --no-interaction --no-root && \
|
||||
# Update and install additional tools
|
||||
# (There used to be an "apt-get update" here, hopefully we can skip it.)
|
||||
{% if enable_browser %}/openhands/micromamba/bin/micromamba run -n openhands poetry run playwright install --with-deps chromium && \{% endif %}
|
||||
{% if enable_browser %}
|
||||
/openhands/micromamba/bin/micromamba run -n openhands poetry run playwright install --with-deps chromium && \
|
||||
{% endif %}
|
||||
# Set environment variables
|
||||
/openhands/micromamba/bin/micromamba run -n openhands poetry run python -c "import sys; print('OH_INTERPRETER_PATH=' + sys.executable)" >> /etc/environment && \
|
||||
# Set permissions
|
||||
|
||||
@@ -42,7 +42,7 @@ from openhands.storage.files import FileStore
|
||||
from openhands.storage.locations import get_conversation_dir
|
||||
from openhands.utils.async_utils import call_sync_from_async
|
||||
from openhands.utils.import_utils import get_impl
|
||||
from openhands.utils.utils import create_registry_and_conversation_stats
|
||||
from openhands.utils.utils import create_registry_and_convo_stats
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -486,14 +486,14 @@ class DockerNestedConversationManager(ConversationManager):
|
||||
user_id, sid, self.config
|
||||
)
|
||||
|
||||
llm_registry, conversation_stats, config = (
|
||||
create_registry_and_conversation_stats(config, sid, user_id, settings)
|
||||
llm_registry, convo_stats, config = create_registry_and_convo_stats(
|
||||
config, sid, user_id, settings
|
||||
)
|
||||
|
||||
session = Session(
|
||||
sid=sid,
|
||||
llm_registry=llm_registry,
|
||||
conversation_stats=conversation_stats,
|
||||
convo_stats=convo_stats,
|
||||
file_store=self.file_store,
|
||||
config=config,
|
||||
sio=self.sio,
|
||||
|
||||
@@ -39,7 +39,7 @@ from openhands.utils.conversation_summary import (
|
||||
)
|
||||
from openhands.utils.import_utils import get_impl
|
||||
from openhands.utils.shutdown_listener import should_continue
|
||||
from openhands.utils.utils import create_registry_and_conversation_stats
|
||||
from openhands.utils.utils import create_registry_and_convo_stats
|
||||
|
||||
from .conversation_manager import ConversationManager
|
||||
|
||||
@@ -335,15 +335,15 @@ class StandaloneConversationManager(ConversationManager):
|
||||
)
|
||||
await self.close_session(oldest_conversation_id)
|
||||
|
||||
llm_registry, conversation_stats, config = (
|
||||
create_registry_and_conversation_stats(self.config, sid, user_id, settings)
|
||||
llm_registry, convo_stats, config = create_registry_and_convo_stats(
|
||||
self.config, sid, user_id, settings
|
||||
)
|
||||
session = Session(
|
||||
sid=sid,
|
||||
file_store=self.file_store,
|
||||
config=config,
|
||||
llm_registry=llm_registry,
|
||||
conversation_stats=conversation_stats,
|
||||
convo_stats=convo_stats,
|
||||
sio=self.sio,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
@@ -63,7 +63,7 @@ from openhands.server.user_auth import (
|
||||
)
|
||||
from openhands.server.user_auth.user_auth import AuthType
|
||||
from openhands.server.utils import get_conversation as get_conversation_metadata
|
||||
from openhands.server.utils import get_conversation_store, validate_conversation_id
|
||||
from openhands.server.utils import get_conversation_store
|
||||
from openhands.storage.conversation.conversation_store import ConversationStore
|
||||
from openhands.storage.data_models.conversation_metadata import (
|
||||
ConversationMetadata,
|
||||
@@ -297,7 +297,7 @@ async def search_conversations(
|
||||
|
||||
@app.get('/conversations/{conversation_id}')
|
||||
async def get_conversation(
|
||||
conversation_id: str = Depends(validate_conversation_id),
|
||||
conversation_id: str,
|
||||
conversation_store: ConversationStore = Depends(get_conversation_store),
|
||||
) -> ConversationInfo | None:
|
||||
try:
|
||||
@@ -319,7 +319,7 @@ async def get_conversation(
|
||||
|
||||
@app.delete('/conversations/{conversation_id}')
|
||||
async def delete_conversation(
|
||||
conversation_id: str = Depends(validate_conversation_id),
|
||||
conversation_id: str,
|
||||
user_id: str | None = Depends(get_user_id),
|
||||
) -> bool:
|
||||
conversation_store = await ConversationStoreImpl.get_instance(config, user_id)
|
||||
@@ -338,8 +338,8 @@ async def delete_conversation(
|
||||
|
||||
@app.get('/conversations/{conversation_id}/remember-prompt')
|
||||
async def get_prompt(
|
||||
conversation_id: str,
|
||||
event_id: int,
|
||||
conversation_id: str = Depends(validate_conversation_id),
|
||||
user_settings: SettingsStore = Depends(get_user_settings_store),
|
||||
metadata: ConversationMetadata = Depends(get_conversation_metadata),
|
||||
):
|
||||
@@ -440,8 +440,8 @@ async def _get_conversation_info(
|
||||
|
||||
@app.post('/conversations/{conversation_id}/start')
|
||||
async def start_conversation(
|
||||
conversation_id: str,
|
||||
providers_set: ProvidersSetModel,
|
||||
conversation_id: str = Depends(validate_conversation_id),
|
||||
user_id: str = Depends(get_user_id),
|
||||
settings: Settings = Depends(get_user_settings),
|
||||
conversation_store: ConversationStore = Depends(get_conversation_store),
|
||||
@@ -501,7 +501,7 @@ async def start_conversation(
|
||||
|
||||
@app.post('/conversations/{conversation_id}/stop')
|
||||
async def stop_conversation(
|
||||
conversation_id: str = Depends(validate_conversation_id),
|
||||
conversation_id: str,
|
||||
user_id: str = Depends(get_user_id),
|
||||
) -> ConversationResponse:
|
||||
"""Stop an agent loop for a conversation.
|
||||
@@ -606,8 +606,8 @@ class UpdateConversationRequest(BaseModel):
|
||||
|
||||
@app.patch('/conversations/{conversation_id}')
|
||||
async def update_conversation(
|
||||
conversation_id: str,
|
||||
data: UpdateConversationRequest,
|
||||
conversation_id: str = Depends(validate_conversation_id),
|
||||
user_id: str | None = Depends(get_user_id),
|
||||
conversation_store: ConversationStore = Depends(get_conversation_store),
|
||||
) -> bool:
|
||||
@@ -714,8 +714,7 @@ async def update_conversation(
|
||||
|
||||
@app.post('/conversations/{conversation_id}/exp-config')
|
||||
def add_experiment_config_for_conversation(
|
||||
exp_config: ExperimentConfig,
|
||||
conversation_id: str = Depends(validate_conversation_id),
|
||||
conversation_id: str, exp_config: ExperimentConfig
|
||||
) -> bool:
|
||||
exp_config_filepath = get_experiment_config_filename(conversation_id)
|
||||
exists = False
|
||||
|
||||
@@ -53,7 +53,7 @@ async def initialize_conversation(
|
||||
conversation_title = get_default_conversation_title(conversation_id)
|
||||
|
||||
logger.info(f'Saving metadata for conversation {conversation_id}')
|
||||
conversation_metadata = ConversationMetadata(
|
||||
convo_metadata = ConversationMetadata(
|
||||
trigger=conversation_trigger,
|
||||
conversation_id=conversation_id,
|
||||
title=conversation_title,
|
||||
@@ -63,12 +63,12 @@ async def initialize_conversation(
|
||||
git_provider=git_provider,
|
||||
)
|
||||
|
||||
await conversation_store.save_metadata(conversation_metadata)
|
||||
return conversation_metadata
|
||||
await conversation_store.save_metadata(convo_metadata)
|
||||
return convo_metadata
|
||||
|
||||
try:
|
||||
conversation_metadata = await conversation_store.get_metadata(conversation_id)
|
||||
return conversation_metadata
|
||||
convo_metadata = await conversation_store.get_metadata(conversation_id)
|
||||
return convo_metadata
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -83,7 +83,7 @@ async def start_conversation(
|
||||
image_urls: list[str] | None,
|
||||
replay_json: str | None,
|
||||
conversation_id: str,
|
||||
conversation_metadata: ConversationMetadata,
|
||||
convo_metadata: ConversationMetadata,
|
||||
conversation_instructions: str | None,
|
||||
mcp_config: MCPConfig | None = None,
|
||||
) -> AgentLoopInfo:
|
||||
@@ -92,7 +92,7 @@ async def start_conversation(
|
||||
extra={
|
||||
'signal': 'create_conversation',
|
||||
'user_id': user_id,
|
||||
'trigger': conversation_metadata.trigger,
|
||||
'trigger': convo_metadata.trigger,
|
||||
},
|
||||
)
|
||||
logger.info('Loading settings')
|
||||
@@ -119,10 +119,10 @@ async def start_conversation(
|
||||
raise MissingSettingsError('Settings not found')
|
||||
|
||||
session_init_args['git_provider_tokens'] = git_provider_tokens
|
||||
session_init_args['selected_repository'] = conversation_metadata.selected_repository
|
||||
session_init_args['selected_repository'] = convo_metadata.selected_repository
|
||||
session_init_args['custom_secrets'] = custom_secrets
|
||||
session_init_args['selected_branch'] = conversation_metadata.selected_branch
|
||||
session_init_args['git_provider'] = conversation_metadata.git_provider
|
||||
session_init_args['selected_branch'] = convo_metadata.selected_branch
|
||||
session_init_args['git_provider'] = convo_metadata.git_provider
|
||||
session_init_args['conversation_instructions'] = conversation_instructions
|
||||
if mcp_config:
|
||||
session_init_args['mcp_config'] = mcp_config
|
||||
|
||||
@@ -6,9 +6,7 @@ from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.llm.llm_registry import RegistryEvent
|
||||
from openhands.llm.metrics import Metrics
|
||||
from openhands.storage.files import FileStore
|
||||
from openhands.storage.locations import (
|
||||
get_conversation_stats_filename,
|
||||
)
|
||||
from openhands.storage.locations import get_conversation_stats_filename
|
||||
|
||||
|
||||
class ConversationStats:
|
||||
@@ -39,10 +37,6 @@ class ConversationStats:
|
||||
pickled = pickle.dumps(self.service_to_metrics)
|
||||
serialized_metrics = base64.b64encode(pickled).decode('utf-8')
|
||||
self.file_store.write(self.metrics_path, serialized_metrics)
|
||||
logger.info(
|
||||
'Saved converation stats',
|
||||
extra={'conversation_id': self.conversation_id},
|
||||
)
|
||||
|
||||
def maybe_restore_metrics(self):
|
||||
if not self.file_store or not self.conversation_id:
|
||||
@@ -60,6 +54,9 @@ class ConversationStats:
|
||||
total_metrics = Metrics()
|
||||
for metrics in self.service_to_metrics.values():
|
||||
total_metrics.merge(metrics)
|
||||
|
||||
logger.info(f'metrics by all services: {self.service_to_metrics}')
|
||||
logger.info(f'combined metrics\n\n{total_metrics}')
|
||||
return total_metrics
|
||||
|
||||
def get_metrics_for_service(self, service_id: str) -> Metrics:
|
||||
|
||||
@@ -67,7 +67,7 @@ class AgentSession:
|
||||
sid: str,
|
||||
file_store: FileStore,
|
||||
llm_registry: LLMRegistry,
|
||||
conversation_stats: ConversationStats,
|
||||
convo_stats: ConversationStats,
|
||||
status_callback: Callable | None = None,
|
||||
user_id: str | None = None,
|
||||
) -> None:
|
||||
@@ -86,7 +86,7 @@ class AgentSession:
|
||||
extra={'session_id': sid, 'user_id': user_id}
|
||||
)
|
||||
self.llm_registry = llm_registry
|
||||
self.conversation_stats = conversation_stats
|
||||
self.convo_stats = convo_stats
|
||||
|
||||
async def start(
|
||||
self,
|
||||
@@ -450,7 +450,7 @@ class AgentSession:
|
||||
user_id=self.user_id,
|
||||
file_store=self.file_store,
|
||||
event_stream=self.event_stream,
|
||||
conversation_stats=self.conversation_stats,
|
||||
convo_stats=self.convo_stats,
|
||||
agent=agent,
|
||||
iteration_delta=int(max_iterations),
|
||||
budget_per_task_delta=max_budget_per_task,
|
||||
|
||||
@@ -55,7 +55,7 @@ class Session:
|
||||
sid: str,
|
||||
config: OpenHandsConfig,
|
||||
llm_registry: LLMRegistry,
|
||||
conversation_stats: ConversationStats,
|
||||
convo_stats: ConversationStats,
|
||||
file_store: FileStore,
|
||||
sio: socketio.AsyncServer | None,
|
||||
user_id: str | None = None,
|
||||
@@ -66,12 +66,12 @@ class Session:
|
||||
self.file_store = file_store
|
||||
self.logger = OpenHandsLoggerAdapter(extra={'session_id': sid})
|
||||
self.llm_registry = llm_registry
|
||||
self.conversation_stats = conversation_stats
|
||||
self.convo_stats = convo_stats
|
||||
self.agent_session = AgentSession(
|
||||
sid,
|
||||
file_store,
|
||||
llm_registry=self.llm_registry,
|
||||
conversation_stats=conversation_stats,
|
||||
convo_stats=convo_stats,
|
||||
status_callback=self.queue_status_message,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
@@ -13,50 +13,6 @@ from openhands.storage.conversation.conversation_store import ConversationStore
|
||||
from openhands.storage.data_models.conversation_metadata import ConversationMetadata
|
||||
|
||||
|
||||
def validate_conversation_id(conversation_id: str) -> str:
|
||||
"""
|
||||
Validate conversation ID format and length.
|
||||
|
||||
Args:
|
||||
conversation_id: The conversation ID to validate
|
||||
|
||||
Returns:
|
||||
The validated conversation ID
|
||||
|
||||
Raises:
|
||||
HTTPException: If the conversation ID is invalid
|
||||
"""
|
||||
# Check length - UUID hex is 32 characters, allow some flexibility but not excessive
|
||||
if len(conversation_id) > 100:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail='Conversation ID is too long',
|
||||
)
|
||||
|
||||
# Check for null bytes and other problematic characters
|
||||
if '\x00' in conversation_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail='Conversation ID contains invalid characters',
|
||||
)
|
||||
|
||||
# Check for path traversal attempts
|
||||
if '..' in conversation_id or '/' in conversation_id or '\\' in conversation_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail='Conversation ID contains invalid path characters',
|
||||
)
|
||||
|
||||
# Check for control characters and newlines
|
||||
if any(ord(c) < 32 for c in conversation_id):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail='Conversation ID contains control characters',
|
||||
)
|
||||
|
||||
return conversation_id
|
||||
|
||||
|
||||
async def get_conversation_store(request: Request) -> ConversationStore | None:
|
||||
conversation_store: ConversationStore | None = getattr(
|
||||
request.state, 'conversation_store', None
|
||||
|
||||
@@ -51,7 +51,7 @@ async def generate_conversation_title(
|
||||
]
|
||||
|
||||
title = llm_registry.request_extraneous_completion(
|
||||
'conversation_title_creator', llm_config, messages
|
||||
'convo_title_creator', llm_config, messages
|
||||
)
|
||||
|
||||
# Ensure the title isn't too long
|
||||
|
||||
@@ -19,7 +19,7 @@ def setup_llm_config(config: OpenHandsConfig, settings: Settings) -> OpenHandsCo
|
||||
return config
|
||||
|
||||
|
||||
def create_registry_and_conversation_stats(
|
||||
def create_registry_and_convo_stats(
|
||||
config: OpenHandsConfig,
|
||||
sid: str,
|
||||
user_id: str | None,
|
||||
@@ -31,13 +31,7 @@ def create_registry_and_conversation_stats(
|
||||
|
||||
agent_cls = user_settings.agent if user_settings else None
|
||||
llm_registry = LLMRegistry(user_config, agent_cls)
|
||||
file_store = get_file_store(
|
||||
file_store_type=config.file_store,
|
||||
file_store_path=config.file_store_path,
|
||||
file_store_web_hook_url=config.file_store_web_hook_url,
|
||||
file_store_web_hook_headers=config.file_store_web_hook_headers,
|
||||
file_store_web_hook_batch=config.file_store_web_hook_batch,
|
||||
)
|
||||
conversation_stats = ConversationStats(file_store, sid, user_id)
|
||||
llm_registry.subscribe(conversation_stats.register_llm)
|
||||
return llm_registry, conversation_stats, user_config
|
||||
file_store = get_file_store(user_config.file_store, user_config.file_store_path)
|
||||
convo_stats = ConversationStats(file_store, sid, user_id)
|
||||
llm_registry.subscribe(convo_stats.register_llm)
|
||||
return llm_registry, convo_stats, user_config
|
||||
|
||||
Generated
+33
-34
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiofiles"
|
||||
@@ -208,7 +208,7 @@ version = "0.7.0"
|
||||
description = "Reusable constraint types to use with typing.Annotated"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "evaluation"]
|
||||
groups = ["main", "evaluation", "test"]
|
||||
files = [
|
||||
{file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
|
||||
{file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
|
||||
@@ -247,7 +247,7 @@ version = "4.9.0"
|
||||
description = "High level compatibility layer for multiple asynchronous event loop implementations"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "evaluation", "runtime"]
|
||||
groups = ["main", "evaluation", "runtime", "test"]
|
||||
files = [
|
||||
{file = "anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c"},
|
||||
{file = "anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028"},
|
||||
@@ -404,7 +404,7 @@ description = "LTS Port of Python audioop"
|
||||
optional = false
|
||||
python-versions = ">=3.13"
|
||||
groups = ["main"]
|
||||
markers = "python_version >= \"3.13\""
|
||||
markers = "python_version == \"3.13\""
|
||||
files = [
|
||||
{file = "audioop_lts-0.2.1-cp313-abi3-macosx_10_13_universal2.whl", hash = "sha256:fd1345ae99e17e6910f47ce7d52673c6a1a70820d78b67de1b7abb3af29c426a"},
|
||||
{file = "audioop_lts-0.2.1-cp313-abi3-macosx_10_13_x86_64.whl", hash = "sha256:e175350da05d2087e12cea8e72a70a1a8b14a17e92ed2022952a4419689ede5e"},
|
||||
@@ -1592,7 +1592,7 @@ files = [
|
||||
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
||||
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||
]
|
||||
markers = {main = "platform_system == \"Windows\" or os_name == \"nt\" or sys_platform == \"win32\"", dev = "os_name == \"nt\" or sys_platform == \"win32\"", runtime = "sys_platform == \"win32\"", test = "sys_platform == \"win32\""}
|
||||
markers = {main = "platform_system == \"Windows\" or os_name == \"nt\" or sys_platform == \"win32\"", dev = "os_name == \"nt\" or sys_platform == \"win32\"", runtime = "sys_platform == \"win32\"", test = "platform_system == \"Windows\" or sys_platform == \"win32\""}
|
||||
|
||||
[[package]]
|
||||
name = "comm"
|
||||
@@ -2135,7 +2135,7 @@ version = "1.9.0"
|
||||
description = "Distro - an OS platform information API"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main", "evaluation"]
|
||||
groups = ["main", "evaluation", "test"]
|
||||
files = [
|
||||
{file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
|
||||
{file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
|
||||
@@ -2997,8 +2997,8 @@ files = [
|
||||
google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
|
||||
google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
|
||||
proto-plus = [
|
||||
{version = ">=1.22.3,<2.0.0dev"},
|
||||
{version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
|
||||
{version = ">=1.22.3,<2.0.0dev"},
|
||||
]
|
||||
protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"
|
||||
|
||||
@@ -3020,8 +3020,8 @@ googleapis-common-protos = ">=1.56.2,<2.0.0"
|
||||
grpcio = {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
|
||||
grpcio-status = {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
|
||||
proto-plus = [
|
||||
{version = ">=1.22.3,<2.0.0"},
|
||||
{version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
|
||||
{version = ">=1.22.3,<2.0.0"},
|
||||
]
|
||||
protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
|
||||
requests = ">=2.18.0,<3.0.0"
|
||||
@@ -3239,8 +3239,8 @@ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0", extras
|
||||
google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0"
|
||||
grpc-google-iam-v1 = ">=0.14.0,<1.0.0"
|
||||
proto-plus = [
|
||||
{version = ">=1.22.3,<2.0.0"},
|
||||
{version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
|
||||
{version = ">=1.22.3,<2.0.0"},
|
||||
]
|
||||
protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
|
||||
|
||||
@@ -3637,7 +3637,7 @@ version = "0.16.0"
|
||||
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "evaluation", "runtime"]
|
||||
groups = ["main", "evaluation", "runtime", "test"]
|
||||
files = [
|
||||
{file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"},
|
||||
{file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"},
|
||||
@@ -3713,7 +3713,7 @@ version = "1.0.9"
|
||||
description = "A minimal low-level HTTP client."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "evaluation", "runtime"]
|
||||
groups = ["main", "evaluation", "runtime", "test"]
|
||||
files = [
|
||||
{file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"},
|
||||
{file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"},
|
||||
@@ -3750,7 +3750,7 @@ version = "0.28.1"
|
||||
description = "The next generation HTTP client."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "evaluation", "runtime"]
|
||||
groups = ["main", "evaluation", "runtime", "test"]
|
||||
files = [
|
||||
{file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"},
|
||||
{file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"},
|
||||
@@ -4206,7 +4206,7 @@ version = "0.10.0"
|
||||
description = "Fast iterable JSON parser."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "evaluation"]
|
||||
groups = ["main", "evaluation", "test"]
|
||||
files = [
|
||||
{file = "jiter-0.10.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:cd2fb72b02478f06a900a5782de2ef47e0396b3e1f7d5aba30daeb1fce66f303"},
|
||||
{file = "jiter-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:32bb468e3af278f095d3fa5b90314728a6916d89ba3d0ffb726dd9bf7367285e"},
|
||||
@@ -6515,14 +6515,14 @@ typing-extensions = {version = "*", markers = "python_full_version < \"3.13.0\""
|
||||
|
||||
[[package]]
|
||||
name = "openai"
|
||||
version = "1.99.9"
|
||||
version = "1.97.1"
|
||||
description = "The official Python library for the openai API"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "evaluation"]
|
||||
groups = ["main", "evaluation", "test"]
|
||||
files = [
|
||||
{file = "openai-1.99.9-py3-none-any.whl", hash = "sha256:9dbcdb425553bae1ac5d947147bebbd630d91bbfc7788394d4c4f3a35682ab3a"},
|
||||
{file = "openai-1.99.9.tar.gz", hash = "sha256:f2082d155b1ad22e83247c3de3958eb4255b20ccf4a1de2e6681b6957b554e92"},
|
||||
{file = "openai-1.97.1-py3-none-any.whl", hash = "sha256:4e96bbdf672ec3d44968c9ea39d2c375891db1acc1794668d8149d5fa6000606"},
|
||||
{file = "openai-1.97.1.tar.gz", hash = "sha256:a744b27ae624e3d4135225da9b1c89c107a2a7e5bc4c93e5b7b5214772ce7a4e"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -6558,14 +6558,14 @@ pydantic = ">=1.8"
|
||||
|
||||
[[package]]
|
||||
name = "openhands-aci"
|
||||
version = "0.3.2"
|
||||
version = "0.3.1"
|
||||
description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands."
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.12"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "openhands_aci-0.3.2-py3-none-any.whl", hash = "sha256:a3ff6fe3dd50124598b8bc3aff8d9742d6e75f933f7e7635a9d0b37d45eb826e"},
|
||||
{file = "openhands_aci-0.3.2.tar.gz", hash = "sha256:df7b64df6acb70b45b23e88c13508e7af8f27725bed30c3e88691a0f3d1f7a44"},
|
||||
{file = "openhands_aci-0.3.1-py3-none-any.whl", hash = "sha256:d1d9d5379388bc0119c6722b8dacf63f7c747788ac5b6c26263601b2001d11c3"},
|
||||
{file = "openhands_aci-0.3.1.tar.gz", hash = "sha256:125c4773b3fd2729ec0c74d005095dad21aa0f7a1e8733e5f33f3f71466f6df9"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -6663,8 +6663,8 @@ files = [
|
||||
[package.dependencies]
|
||||
googleapis-common-protos = ">=1.52,<2.0"
|
||||
grpcio = [
|
||||
{version = ">=1.63.2,<2.0.0", markers = "python_version < \"3.13\""},
|
||||
{version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""},
|
||||
{version = ">=1.63.2,<2.0.0", markers = "python_version < \"3.13\""},
|
||||
]
|
||||
opentelemetry-api = ">=1.15,<2.0"
|
||||
opentelemetry-exporter-otlp-proto-common = "1.34.1"
|
||||
@@ -7546,7 +7546,7 @@ version = "2.11.5"
|
||||
description = "Data validation using Python type hints"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "evaluation"]
|
||||
groups = ["main", "evaluation", "test"]
|
||||
files = [
|
||||
{file = "pydantic-2.11.5-py3-none-any.whl", hash = "sha256:f9c26ba06f9747749ca1e5c94d6a85cb84254577553c8785576fd38fa64dc0f7"},
|
||||
{file = "pydantic-2.11.5.tar.gz", hash = "sha256:7f853db3d0ce78ce8bbb148c401c2cdd6431b3473c0cdff2755c7690952a7b7a"},
|
||||
@@ -7568,7 +7568,7 @@ version = "2.33.2"
|
||||
description = "Core functionality for Pydantic validation and serialization"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "evaluation"]
|
||||
groups = ["main", "evaluation", "test"]
|
||||
files = [
|
||||
{file = "pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8"},
|
||||
{file = "pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d"},
|
||||
@@ -9438,7 +9438,6 @@ files = [
|
||||
{file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"},
|
||||
{file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"},
|
||||
]
|
||||
markers = {evaluation = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
|
||||
|
||||
[package.extras]
|
||||
check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""]
|
||||
@@ -9588,7 +9587,7 @@ version = "1.3.1"
|
||||
description = "Sniff out which async library your code is running under"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main", "evaluation", "runtime"]
|
||||
groups = ["main", "evaluation", "runtime", "test"]
|
||||
files = [
|
||||
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
|
||||
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
||||
@@ -9682,7 +9681,7 @@ description = "Standard library aifc redistribution. \"dead battery\"."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
markers = "python_version >= \"3.13\""
|
||||
markers = "python_version == \"3.13\""
|
||||
files = [
|
||||
{file = "standard_aifc-3.13.0-py3-none-any.whl", hash = "sha256:f7ae09cc57de1224a0dd8e3eb8f73830be7c3d0bc485de4c1f82b4a7f645ac66"},
|
||||
{file = "standard_aifc-3.13.0.tar.gz", hash = "sha256:64e249c7cb4b3daf2fdba4e95721f811bde8bdfc43ad9f936589b7bb2fae2e43"},
|
||||
@@ -9699,7 +9698,7 @@ description = "Standard library chunk redistribution. \"dead battery\"."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
markers = "python_version >= \"3.13\""
|
||||
markers = "python_version == \"3.13\""
|
||||
files = [
|
||||
{file = "standard_chunk-3.13.0-py3-none-any.whl", hash = "sha256:17880a26c285189c644bd5bd8f8ed2bdb795d216e3293e6dbe55bbd848e2982c"},
|
||||
{file = "standard_chunk-3.13.0.tar.gz", hash = "sha256:4ac345d37d7e686d2755e01836b8d98eda0d1a3ee90375e597ae43aaf064d654"},
|
||||
@@ -10215,7 +10214,7 @@ version = "4.67.1"
|
||||
description = "Fast, Extensible Progress Meter"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main", "evaluation"]
|
||||
groups = ["main", "evaluation", "test"]
|
||||
files = [
|
||||
{file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
|
||||
{file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
|
||||
@@ -10671,7 +10670,7 @@ version = "0.4.1"
|
||||
description = "Runtime typing introspection tools"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "evaluation"]
|
||||
groups = ["main", "evaluation", "test"]
|
||||
files = [
|
||||
{file = "typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51"},
|
||||
{file = "typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28"},
|
||||
@@ -11388,14 +11387,14 @@ test = ["pytest", "pytest-cov"]
|
||||
|
||||
[[package]]
|
||||
name = "xlsxwriter"
|
||||
version = "3.2.5"
|
||||
version = "3.2.3"
|
||||
description = "A Python module for creating Excel XLSX files."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "xlsxwriter-3.2.5-py3-none-any.whl", hash = "sha256:4f4824234e1eaf9d95df9a8fe974585ff91d0f5e3d3f12ace5b71e443c1c6abd"},
|
||||
{file = "xlsxwriter-3.2.5.tar.gz", hash = "sha256:7e88469d607cdc920151c0ab3ce9cf1a83992d4b7bc730c5ffdd1a12115a7dbe"},
|
||||
{file = "XlsxWriter-3.2.3-py3-none-any.whl", hash = "sha256:593f8296e8a91790c6d0378ab08b064f34a642b3feb787cf6738236bd0a4860d"},
|
||||
{file = "xlsxwriter-3.2.3.tar.gz", hash = "sha256:ad6fd41bdcf1b885876b1f6b7087560aecc9ae5a9cc2ba97dcac7ab2e210d3d5"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -11879,4 +11878,4 @@ third-party-runtimes = ["daytona", "e2b", "modal", "runloop-api-client"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.12,<3.14"
|
||||
content-hash = "469b54a3f7f5d104f68503fc70a89c016cbb7d9b7dc019226ed62e93ee928b98"
|
||||
content-hash = "dbcab8224ee537e465f51c5170d8c19e749236c7ba01268f459140c95266afd7"
|
||||
|
||||
+3
-3
@@ -6,7 +6,7 @@ requires = [
|
||||
|
||||
[tool.poetry]
|
||||
name = "openhands-ai"
|
||||
version = "0.54.0"
|
||||
version = "0.53.0"
|
||||
description = "OpenHands: Code Less, Make More"
|
||||
authors = [ "OpenHands" ]
|
||||
license = "MIT"
|
||||
@@ -27,7 +27,6 @@ build = "build_vscode.py" # Build VSCode extension during Poetry build
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.12,<3.14"
|
||||
litellm = "^1.74.3, !=1.64.4, !=1.67.*" # avoid 1.64.4 (known bug) & 1.67.* (known bug #10272)
|
||||
openai = "1.99.9" # Pin due to litellm incompatibility with >=1.100.0 (BerriAI/litellm#13711)
|
||||
aiohttp = ">=3.9.0,!=3.11.13" # Pin to avoid yanked version 3.11.13
|
||||
google-generativeai = "*" # To use litellm with Gemini Pro API
|
||||
google-api-python-client = "^2.164.0" # For Google Sheets API
|
||||
@@ -64,7 +63,7 @@ opentelemetry-exporter-otlp-proto-grpc = "^1.33.1"
|
||||
libtmux = ">=0.37,<0.40"
|
||||
pygithub = "^2.5.0"
|
||||
joblib = "*"
|
||||
openhands-aci = "0.3.2"
|
||||
openhands-aci = "0.3.1"
|
||||
python-socketio = "^5.11.4"
|
||||
sse-starlette = "^2.1.3"
|
||||
psutil = "*"
|
||||
@@ -129,6 +128,7 @@ pytest-forked = "*"
|
||||
pytest-xdist = "*"
|
||||
pytest-playwright = "^0.7.0"
|
||||
pytest-timeout = "^2.4.0"
|
||||
openai = "*"
|
||||
pandas = "*"
|
||||
reportlab = "*"
|
||||
gevent = ">=24.2.1,<26.0.0"
|
||||
|
||||
@@ -1,226 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Update OpenHands OpenAPI documentation.
|
||||
|
||||
Generates the OpenAPI specification from the FastAPI application and writes it
|
||||
to docs/openapi.json.
|
||||
|
||||
Usage:
|
||||
python scripts/update_openapi.py
|
||||
|
||||
Behavior:
|
||||
- Uses openhands.server.app.app.openapi() to build the spec.
|
||||
- Preserves existing "servers" from docs/openapi.json if present; otherwise
|
||||
writes sensible defaults.
|
||||
- Sets info.version to openhands.__version__.
|
||||
- Sanitizes endpoint descriptions to remove code blocks and internal-only sections.
|
||||
- Excludes operational/UI-only convenience endpoints:
|
||||
- /server_info
|
||||
- /api/conversations/{conversation_id}/vscode-url
|
||||
- /api/conversations/{conversation_id}/web-hosts
|
||||
- Creates a backup docs/openapi.json.backup before overwriting.
|
||||
|
||||
Output:
|
||||
- Prints OpenAPI and API versions, endpoint count, servers count, and sample endpoints.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
# Suppress warnings and logs during import
|
||||
logging.getLogger().setLevel(logging.CRITICAL)
|
||||
warnings.filterwarnings('ignore')
|
||||
os.environ['OPENHANDS_LOG_LEVEL'] = 'CRITICAL'
|
||||
|
||||
# Add the project root to the Python path
|
||||
project_root = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
try:
|
||||
from openhands import __version__
|
||||
from openhands.server.app import app
|
||||
except ImportError as e:
|
||||
print(f'Error importing OpenHands modules: {e}')
|
||||
print(
|
||||
"Make sure you're running this script from the project root and dependencies are installed."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _sanitize_description(text: str) -> str:
|
||||
"""Remove internal, code-centric, or redundant sections from endpoint descriptions.
|
||||
|
||||
- Strip fenced code blocks
|
||||
- Remove Args/Returns/Raises/Example/Examples/Notes sections
|
||||
- Remove inline curl examples
|
||||
- Avoid provider-implementation specifics like LiteLLM/Bedrock
|
||||
"""
|
||||
import re
|
||||
|
||||
if not text:
|
||||
return text
|
||||
|
||||
# Remove fenced code blocks
|
||||
text = re.sub(r'```[\s\S]*?```', '', text, flags=re.MULTILINE)
|
||||
|
||||
# Remove common docstring sections (until next blank line or end)
|
||||
for header in [
|
||||
r'Args?:',
|
||||
r'Returns?:',
|
||||
r'Raises?:',
|
||||
r'Example[s]?:',
|
||||
r'Notes?:',
|
||||
]:
|
||||
text = re.sub(rf'(?ms)^\s*{header}.*?(?:\n\s*\n|\Z)', '', text)
|
||||
|
||||
# Remove lines that contain curl examples
|
||||
text = re.sub(r'(?im)^.*\bcurl\b.*$', '', text)
|
||||
|
||||
# Generalize provider-implementation specifics
|
||||
text = re.sub(r'\bLiteLLM\b', 'configured model providers', text)
|
||||
text = re.sub(r'\blitellm\b', 'configured providers', text)
|
||||
text = re.sub(r'\bBedrock\b', '', text)
|
||||
|
||||
# Collapse excessive blank lines and trim
|
||||
text = re.sub(r'\n{3,}', '\n\n', text).strip()
|
||||
return text
|
||||
|
||||
|
||||
def _sanitize_spec(spec: dict) -> dict:
|
||||
"""Sanitize descriptions and summaries to be public-API friendly."""
|
||||
path_summary_overrides = {
|
||||
'/api/options/models': 'List Supported Models',
|
||||
'/api/options/agents': 'List Agents',
|
||||
'/api/options/security-analyzers': 'List Security Analyzers',
|
||||
'/api/conversations/{conversation_id}/list-files': 'List Workspace Files',
|
||||
'/api/conversations/{conversation_id}/select-file': 'Get File Content',
|
||||
'/api/conversations/{conversation_id}/zip-directory': 'Download Workspace Archive',
|
||||
}
|
||||
path_description_overrides = {
|
||||
'/api/options/models': 'List model identifiers available on this server based on configured providers.',
|
||||
'/api/options/agents': 'List available agent types supported by this server.',
|
||||
'/api/options/security-analyzers': 'List supported security analyzers.',
|
||||
'/api/conversations/{conversation_id}/list-files': 'List workspace files visible to the conversation runtime. Applies .gitignore and internal ignore rules.',
|
||||
'/api/conversations/{conversation_id}/select-file': 'Return the content of the given file from the conversation workspace.',
|
||||
'/api/conversations/{conversation_id}/zip-directory': 'Return a ZIP archive of the current conversation workspace.',
|
||||
}
|
||||
|
||||
for path, methods in list(spec.get('paths', {}).items()):
|
||||
for method, meta in list(methods.items()):
|
||||
if not isinstance(meta, dict):
|
||||
continue
|
||||
# Override overly specific summaries where helpful
|
||||
if path in path_summary_overrides:
|
||||
meta['summary'] = path_summary_overrides[path]
|
||||
# Override description if provided; otherwise sanitize
|
||||
if path in path_description_overrides:
|
||||
meta['description'] = path_description_overrides[path]
|
||||
elif 'description' in meta and isinstance(meta['description'], str):
|
||||
meta['description'] = _sanitize_description(meta['description'])
|
||||
|
||||
return spec
|
||||
|
||||
|
||||
def generate_openapi_spec():
|
||||
"""Generate the OpenAPI specification from the FastAPI app."""
|
||||
spec = app.openapi()
|
||||
|
||||
# Explicitly exclude certain endpoints that are operational, experimental, or UI-only convenience
|
||||
excluded_endpoints = [
|
||||
'/api/conversations/{conversation_id}/exp-config', # Internal experimentation endpoint
|
||||
'/server_info', # Operational/system diagnostics
|
||||
'/api/conversations/{conversation_id}/vscode-url', # UI/runtime convenience
|
||||
'/api/conversations/{conversation_id}/web-hosts', # UI/runtime convenience
|
||||
]
|
||||
|
||||
if 'paths' in spec:
|
||||
for endpoint in excluded_endpoints:
|
||||
if endpoint in spec['paths']:
|
||||
del spec['paths'][endpoint]
|
||||
print(f'Excluded endpoint: {endpoint}')
|
||||
|
||||
# Sanitize descriptions and summaries
|
||||
spec = _sanitize_spec(spec)
|
||||
|
||||
return spec
|
||||
|
||||
|
||||
def load_current_spec(spec_path):
|
||||
"""Load the current OpenAPI specification if it exists."""
|
||||
if spec_path.exists():
|
||||
with open(spec_path, 'r') as f:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
|
||||
def update_openapi_spec(spec_path, backup=True):
|
||||
"""Update the OpenAPI specification file."""
|
||||
# Generate new spec
|
||||
new_spec = generate_openapi_spec()
|
||||
|
||||
# Load current spec for server information
|
||||
current_spec = load_current_spec(spec_path)
|
||||
|
||||
# Preserve server information from current spec if it exists
|
||||
if 'servers' in current_spec:
|
||||
new_spec['servers'] = current_spec['servers']
|
||||
else:
|
||||
# Default servers if none exist
|
||||
new_spec['servers'] = [
|
||||
{'url': 'https://app.all-hands.dev', 'description': 'Production server'},
|
||||
{'url': 'http://localhost:3000', 'description': 'Local server'},
|
||||
]
|
||||
|
||||
# Update version to match the package version
|
||||
new_spec['info']['version'] = __version__
|
||||
|
||||
# Backup current file if requested
|
||||
if backup and spec_path.exists():
|
||||
backup_path = spec_path.with_suffix('.json.backup')
|
||||
spec_path.rename(backup_path)
|
||||
print(f'Backed up current spec to {backup_path}')
|
||||
|
||||
# Write new spec
|
||||
with open(spec_path, 'w') as f:
|
||||
json.dump(new_spec, f, indent=2)
|
||||
|
||||
return new_spec
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function."""
|
||||
spec_path = project_root / 'docs' / 'openapi.json'
|
||||
|
||||
print('Updating OpenAPI specification...')
|
||||
print(f'Target file: {spec_path}')
|
||||
|
||||
try:
|
||||
new_spec = update_openapi_spec(spec_path)
|
||||
|
||||
print('✅ Successfully updated OpenAPI specification!')
|
||||
print(f' OpenAPI version: {new_spec.get("openapi", "N/A")}')
|
||||
print(f' API version: {new_spec.get("info", {}).get("version", "N/A")}')
|
||||
print(f' Total endpoints: {len(new_spec.get("paths", {}))}')
|
||||
print(f' Servers: {len(new_spec.get("servers", []))}')
|
||||
|
||||
# List some key endpoints
|
||||
paths = list(new_spec.get('paths', {}).keys())
|
||||
if paths:
|
||||
print(' Sample endpoints:')
|
||||
for path in sorted(paths)[:5]:
|
||||
methods = list(new_spec['paths'][path].keys())
|
||||
print(f' {path}: {methods}')
|
||||
if len(paths) > 5:
|
||||
print(f' ... and {len(paths) - 5} more')
|
||||
|
||||
except Exception as e:
|
||||
print(f'❌ Error updating OpenAPI specification: {e}')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
+19
-1
@@ -22,6 +22,7 @@ The following environment variables are required:
|
||||
Optional environment variables:
|
||||
|
||||
- `LLM_BASE_URL`: The base URL for the LLM API (if using a custom endpoint)
|
||||
- `GITLAB_TOKEN`: A GitLab token for testing GitLab integration (required for GitLab tests)
|
||||
|
||||
### Configuration Options
|
||||
|
||||
@@ -73,9 +74,11 @@ poetry run pytest test_settings.py::test_github_token_configuration -v
|
||||
# Run the conversation start test
|
||||
poetry run pytest test_conversation.py::test_conversation_start -v
|
||||
|
||||
# Run the GitLab integration test
|
||||
poetry run pytest test_gitlab_integration.py::test_gitlab_repository_cloning -v
|
||||
|
||||
# Run individual tests with custom base URL
|
||||
poetry run pytest test_settings.py::test_github_token_configuration -v --base-url=https://my-instance.com
|
||||
|
||||
```
|
||||
|
||||
### Running with Visible Browser
|
||||
@@ -86,6 +89,7 @@ To run the tests with a visible browser (non-headless mode) so you can watch the
|
||||
cd tests/e2e
|
||||
poetry run pytest test_settings.py::test_github_token_configuration -v --no-headless --slow-mo=50
|
||||
poetry run pytest test_conversation.py::test_conversation_start -v --no-headless --slow-mo=50
|
||||
poetry run pytest test_gitlab_integration.py::test_gitlab_repository_cloning -v --no-headless --slow-mo=50
|
||||
|
||||
# Combine with custom base URL
|
||||
poetry run pytest test_settings.py::test_github_token_configuration -v --no-headless --slow-mo=50 --base-url=https://my-instance.com
|
||||
@@ -122,6 +126,20 @@ The conversation start test (`test_conversation_start`) performs the following s
|
||||
6. Asks "How many lines are there in the main README.md file?"
|
||||
7. Waits for and verifies the agent's response
|
||||
|
||||
### GitLab Integration Test
|
||||
|
||||
The GitLab integration test (`test_gitlab_repository_cloning`) performs the following steps:
|
||||
|
||||
1. Navigates to the OpenHands application
|
||||
2. Configures GitLab token if needed (from `GITLAB_TOKEN` environment variable)
|
||||
3. Selects GitLab as the provider
|
||||
4. Selects a public GitLab repository (gitlab-org/gitlab-foss)
|
||||
5. Clicks the "Launch" button
|
||||
6. Waits for the conversation interface to load
|
||||
7. Waits for the agent to initialize
|
||||
8. Asks the agent to list workspace contents to verify repository cloning
|
||||
9. Verifies that the GitLab repository was successfully cloned into the workspace
|
||||
|
||||
|
||||
|
||||
### Simple Browser Navigation Test
|
||||
|
||||
@@ -0,0 +1,874 @@
|
||||
"""
|
||||
E2E: GitLab integration test
|
||||
|
||||
This test verifies that OpenHands can successfully integrate with GitLab
|
||||
repositories by configuring a GitLab token, cloning a repository, and
|
||||
performing actual work with the cloned repository.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
|
||||
from playwright.sync_api import Page, expect
|
||||
|
||||
|
||||
def test_gitlab_repository_cloning(page: Page):
|
||||
"""
|
||||
Test GitLab repository integration with GitLab token configuration:
|
||||
1. Navigate to OpenHands and configure GitLab token in settings
|
||||
2. Select a GitLab repository (gitlab-org/gitlab-foss)
|
||||
3. Launch the repository and wait for agent initialization
|
||||
4. Ask the agent to count lines in README.md to verify repository access
|
||||
5. Verify the agent can successfully work with the cloned GitLab repository
|
||||
|
||||
This test verifies that OpenHands can properly clone and work with GitLab repositories.
|
||||
"""
|
||||
# Create test-results directory if it doesn't exist
|
||||
os.makedirs('test-results', exist_ok=True)
|
||||
|
||||
# Navigate to the OpenHands application
|
||||
print('Step 1: Navigating to OpenHands application...')
|
||||
page.goto('http://localhost:12000')
|
||||
page.wait_for_load_state('networkidle', timeout=30000)
|
||||
|
||||
# Take initial screenshot
|
||||
page.screenshot(path='test-results/gitlab_01_initial_load.png')
|
||||
print('Screenshot saved: gitlab_01_initial_load.png')
|
||||
|
||||
# Step 1.5: Handle any initial modals (LLM API key configuration)
|
||||
try:
|
||||
# Check for AI Provider Configuration modal
|
||||
config_modal = page.locator('text=AI Provider Configuration')
|
||||
if config_modal.is_visible(timeout=5000):
|
||||
print('AI Provider Configuration modal detected')
|
||||
|
||||
# Fill in the LLM API key if available
|
||||
llm_api_key_input = page.locator('[data-testid="llm-api-key-input"]')
|
||||
if llm_api_key_input.is_visible(timeout=3000):
|
||||
llm_api_key = os.getenv('LLM_API_KEY', 'test-key')
|
||||
llm_api_key_input.fill(llm_api_key)
|
||||
print(f'Filled LLM API key (length: {len(llm_api_key)})')
|
||||
|
||||
# Click the Save button
|
||||
save_button = page.locator('button:has-text("Save")')
|
||||
if save_button.is_visible(timeout=3000):
|
||||
save_button.click()
|
||||
page.wait_for_timeout(2000)
|
||||
print('Saved LLM API key configuration')
|
||||
|
||||
# Check for Privacy Preferences modal
|
||||
privacy_modal = page.locator('text=Your Privacy Preferences')
|
||||
if privacy_modal.is_visible(timeout=5000):
|
||||
print('Privacy Preferences modal detected')
|
||||
confirm_button = page.locator('button:has-text("Confirm Preferences")')
|
||||
if confirm_button.is_visible(timeout=3000):
|
||||
confirm_button.click()
|
||||
page.wait_for_timeout(2000)
|
||||
print('Confirmed privacy preferences')
|
||||
except Exception as e:
|
||||
print(f'Error handling initial modals: {e}')
|
||||
|
||||
# Step 2: Configure GitLab token in settings
|
||||
print('Step 2: Configuring GitLab token in settings...')
|
||||
|
||||
# Check if we need to configure GitLab token
|
||||
try:
|
||||
# Look for settings navigation button
|
||||
navigate_to_settings_button = page.locator(
|
||||
'[data-testid="navigate-to-settings-button"]'
|
||||
)
|
||||
settings_button = page.locator('button:has-text("Settings")')
|
||||
|
||||
if navigate_to_settings_button.is_visible(timeout=3000):
|
||||
navigate_to_settings_button.click()
|
||||
elif settings_button.is_visible(timeout=3000):
|
||||
settings_button.click()
|
||||
else:
|
||||
# Navigate directly to settings
|
||||
page.goto('http://localhost:12000/settings/integrations')
|
||||
|
||||
page.wait_for_load_state('networkidle', timeout=10000)
|
||||
page.wait_for_timeout(3000)
|
||||
|
||||
# Make sure we're on the Integrations tab
|
||||
integrations_tab = page.locator('text=Integrations')
|
||||
if integrations_tab.is_visible(timeout=3000):
|
||||
if not page.url.endswith('/settings/integrations'):
|
||||
integrations_tab.click()
|
||||
page.wait_for_load_state('networkidle')
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
# Configure GitLab token
|
||||
gitlab_token = os.getenv('GITLAB_TOKEN', '')
|
||||
if gitlab_token:
|
||||
gitlab_token_input = page.locator('[data-testid="gitlab-token-input"]')
|
||||
if gitlab_token_input.is_visible(timeout=5000):
|
||||
gitlab_token_input.clear()
|
||||
gitlab_token_input.fill(gitlab_token)
|
||||
print(f'Filled GitLab token (length: {len(gitlab_token)})')
|
||||
|
||||
# Save the configuration
|
||||
save_button = page.locator('[data-testid="submit-button"]')
|
||||
if (
|
||||
save_button.is_visible(timeout=3000)
|
||||
and not save_button.is_disabled()
|
||||
):
|
||||
save_button.click()
|
||||
page.wait_for_timeout(3000)
|
||||
print('GitLab token saved')
|
||||
|
||||
# Navigate back to home page
|
||||
page.goto('http://localhost:12000')
|
||||
page.wait_for_load_state('networkidle')
|
||||
page.wait_for_timeout(5000)
|
||||
else:
|
||||
print('GitLab token input field not found')
|
||||
else:
|
||||
print('No GitLab token found in environment variables')
|
||||
# Navigate back to home anyway
|
||||
page.goto('http://localhost:12000')
|
||||
page.wait_for_load_state('networkidle')
|
||||
|
||||
except Exception as e:
|
||||
print(f'Error configuring GitLab token: {e}')
|
||||
page.goto('http://localhost:12000')
|
||||
page.wait_for_load_state('networkidle')
|
||||
|
||||
page.screenshot(path='test-results/gitlab_03_after_settings.png')
|
||||
print('Screenshot saved: gitlab_03_after_settings.png')
|
||||
|
||||
# Step 3: Select GitLab repository
|
||||
print('Step 3: Selecting GitLab repository...')
|
||||
|
||||
# Wait for home screen to load
|
||||
home_screen = page.locator('[data-testid="home-screen"]')
|
||||
expect(home_screen).to_be_visible(timeout=15000)
|
||||
print('Home screen is visible')
|
||||
|
||||
# Step 4: Check if provider selection is needed (GitLab)
|
||||
print('Step 4: Checking provider selection after returning from settings...')
|
||||
|
||||
# After returning from settings, the provider selection might have been reset
|
||||
# Check if provider dropdown exists and select GitLab if needed
|
||||
provider_dropdown_exists = page.evaluate("""
|
||||
() => {
|
||||
// Look for "Select Provider" text which indicates the dropdown exists
|
||||
const selectProviderElements = Array.from(document.querySelectorAll('*')).filter(el =>
|
||||
el.textContent && el.textContent.includes('Select Provider')
|
||||
);
|
||||
return selectProviderElements.length > 0;
|
||||
}
|
||||
""")
|
||||
|
||||
print(f'Provider dropdown exists: {provider_dropdown_exists}')
|
||||
|
||||
if provider_dropdown_exists:
|
||||
print(
|
||||
'Provider dropdown detected (likely reset after settings navigation), selecting GitLab...'
|
||||
)
|
||||
|
||||
# Try to click the provider dropdown robustly by targeting the react-select control
|
||||
provider_clicked = False
|
||||
try:
|
||||
provider_scope = page.locator('div:has-text("Select Provider")').first
|
||||
control = provider_scope.locator(
|
||||
'.select__control, .react-select__control'
|
||||
).first
|
||||
if control.is_visible(timeout=2000):
|
||||
control.click(force=True)
|
||||
provider_clicked = True
|
||||
print('Clicked provider dropdown via react-select control')
|
||||
except Exception as e:
|
||||
print(f'Primary provider control click failed: {e}')
|
||||
|
||||
if not provider_clicked:
|
||||
# Fallback approaches to find and click the provider dropdown
|
||||
provider_selectors = [
|
||||
'div:has-text("Select Provider")',
|
||||
'[placeholder="Select Provider"]',
|
||||
'div[class*="select"]:has-text("Select Provider")',
|
||||
'.react-select__control:has-text("Select Provider")',
|
||||
'[class*="select"][class*="control"]',
|
||||
]
|
||||
for selector in provider_selectors:
|
||||
try:
|
||||
element = page.locator(selector).first
|
||||
if element.is_visible(timeout=3000):
|
||||
element.click(force=True)
|
||||
print(f'Clicked provider dropdown with selector: {selector}')
|
||||
provider_clicked = True
|
||||
break
|
||||
except Exception as e:
|
||||
print(f'Failed with selector {selector}: {e}')
|
||||
continue
|
||||
|
||||
if not provider_clicked:
|
||||
# Try JavaScript-based clicking as fallback
|
||||
js_result = page.evaluate("""
|
||||
() => {
|
||||
const els = Array.from(document.querySelectorAll('*')).filter(el =>
|
||||
el.textContent && el.textContent.includes('Select Provider')
|
||||
);
|
||||
for (const el of els) {
|
||||
let target = el;
|
||||
while (target && target !== document.body) {
|
||||
const cls = (target.className || '').toString();
|
||||
if (target.click && (cls.includes('select') || target.getAttribute('role') === 'combobox')) {
|
||||
target.click();
|
||||
return 'clicked_' + target.tagName;
|
||||
}
|
||||
target = target.parentElement;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
""")
|
||||
if js_result:
|
||||
print(f'Clicked provider dropdown with JavaScript: {js_result}')
|
||||
provider_clicked = True
|
||||
else:
|
||||
raise Exception('Could not click provider dropdown with any method')
|
||||
|
||||
page.wait_for_timeout(1000)
|
||||
|
||||
# Select GitLab from provider options (account for label capitalization: Gitlab vs GitLab)
|
||||
gitlab_selectors = [
|
||||
'[role="option"]:has-text("GitLab")',
|
||||
'div:has-text("GitLab")',
|
||||
'.react-select__option:has-text("GitLab")',
|
||||
'[class*="option"]:has-text("GitLab")',
|
||||
]
|
||||
|
||||
gitlab_selected = False
|
||||
for selector in gitlab_selectors:
|
||||
try:
|
||||
gitlab_option = page.locator(selector).first
|
||||
if gitlab_option.is_visible(timeout=3000):
|
||||
gitlab_option.click()
|
||||
print(f'Selected GitLab provider with selector: {selector}')
|
||||
gitlab_selected = True
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not gitlab_selected:
|
||||
print('GitLab provider option not found, trying keyboard navigation')
|
||||
page.keyboard.press('ArrowDown')
|
||||
page.keyboard.press('ArrowDown') # Assuming GitLab is second option
|
||||
page.keyboard.press('Enter')
|
||||
print('Used keyboard navigation to select GitLab')
|
||||
|
||||
page.wait_for_timeout(2000)
|
||||
else:
|
||||
print('No provider dropdown found, GitLab should be auto-selected')
|
||||
page.wait_for_timeout(1000)
|
||||
|
||||
# Step 5: Search for repository
|
||||
print('Step 5: Searching for GitLab repository...')
|
||||
|
||||
# Prefer robust selection by test id for repo dropdown
|
||||
dropdown = None
|
||||
try:
|
||||
repo_dropdown = page.locator('[data-testid="repo-dropdown"]').first
|
||||
if repo_dropdown.is_visible(timeout=5000):
|
||||
dropdown = repo_dropdown
|
||||
print('Found repository dropdown via [data-testid="repo-dropdown"]')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if dropdown is None:
|
||||
# Fallback: try multiple selectors for the repository search dropdown
|
||||
repo_selectors = [
|
||||
'text=Search repositories...',
|
||||
'[placeholder="Search repositories..."]',
|
||||
'div:has-text("Search repositories...")',
|
||||
'.react-select__placeholder:has-text("Search repositories...")',
|
||||
]
|
||||
for selector in repo_selectors:
|
||||
try:
|
||||
element = page.locator(selector).first
|
||||
if element.is_visible(timeout=3000):
|
||||
dropdown = element
|
||||
print(f'Found repository search with selector: {selector}')
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if dropdown is None:
|
||||
print(
|
||||
'Could not find repository search, trying second dropdown in Connect section'
|
||||
)
|
||||
# Try to find the second dropdown in the Connect to a Repository section
|
||||
connect_section = page.locator('div:has-text("Connect to a Repository")').first
|
||||
dropdowns = connect_section.locator('div[class*="select"]')
|
||||
if dropdowns.count() >= 2:
|
||||
dropdown = dropdowns.nth(1)
|
||||
print('Using second dropdown in Connect section')
|
||||
|
||||
expect(dropdown).to_be_visible(timeout=10000)
|
||||
# Ensure the menu opens (react-select requires focusing the control)
|
||||
try:
|
||||
control = dropdown.locator('.select__control, .react-select__control').first
|
||||
if control and control.is_visible(timeout=1000):
|
||||
control.click()
|
||||
else:
|
||||
dropdown.click()
|
||||
except Exception:
|
||||
dropdown.click()
|
||||
page.wait_for_timeout(300)
|
||||
# Focus the input for typing
|
||||
try:
|
||||
input_el = dropdown.locator('input').first
|
||||
input_el.click()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try multiple GitLab repositories (prefer small public examples)
|
||||
gitlab_repo_candidates = [
|
||||
'https://gitlab.com/gitlab-examples/ci-hello-world',
|
||||
'https://gitlab.com/gitlab-examples/hello-world',
|
||||
'gitlab-examples/ci-hello-world', # fallback to plain label
|
||||
'gitlab-examples/hello-world',
|
||||
]
|
||||
|
||||
option_found = False
|
||||
selected_repo = None
|
||||
|
||||
for gitlab_repo in gitlab_repo_candidates:
|
||||
print(f'Trying repository: {gitlab_repo}')
|
||||
|
||||
# Determine the label we expect in the dropdown options (owner/repo)
|
||||
if gitlab_repo.startswith('http'):
|
||||
# Extract path after domain, e.g., gitlab-examples/ci-hello-world
|
||||
try:
|
||||
search_label = (
|
||||
gitlab_repo.split('://', 1)[1].split('/', 1)[1].strip('/')
|
||||
)
|
||||
except Exception:
|
||||
search_label = (
|
||||
gitlab_repo.rsplit('/', 2)[-2]
|
||||
+ '/'
|
||||
+ gitlab_repo.rsplit('/', 1)[-1]
|
||||
)
|
||||
else:
|
||||
search_label = gitlab_repo
|
||||
|
||||
# Clear the search field and type into the dropdown input
|
||||
try:
|
||||
input_el = dropdown.locator('input').first
|
||||
if input_el.is_visible(timeout=1000):
|
||||
input_el.press('Control+a')
|
||||
input_el.press('Delete')
|
||||
input_el.type(gitlab_repo)
|
||||
else:
|
||||
# Fallback to page-level keyboard events
|
||||
page.keyboard.press('Control+a')
|
||||
page.keyboard.press('Delete')
|
||||
page.keyboard.type(gitlab_repo)
|
||||
print(
|
||||
f'Typed repository query: {gitlab_repo} (expect option label: {search_label})'
|
||||
)
|
||||
except Exception as e:
|
||||
print(f'Keyboard/input failed: {e}')
|
||||
continue
|
||||
|
||||
page.wait_for_timeout(3000) # Wait for search results
|
||||
|
||||
# Try to find and click the repository option matching the expected label
|
||||
option_selectors = [
|
||||
f'[data-testid="repo-dropdown"] [role="option"]:has-text("{search_label}")',
|
||||
f'[role="option"]:has-text("{search_label}")',
|
||||
f'div:has-text("{search_label}"):not([id="aria-results"])',
|
||||
'[role="option"]', # Any option as fallback
|
||||
]
|
||||
|
||||
for selector in option_selectors:
|
||||
try:
|
||||
option = page.locator(selector).first
|
||||
if option.is_visible(timeout=3000):
|
||||
print(f'Found repository option with selector: {selector}')
|
||||
try:
|
||||
option.click()
|
||||
print(
|
||||
f'Successfully clicked repository option for {search_label}'
|
||||
)
|
||||
option_found = True
|
||||
selected_repo = search_label
|
||||
page.wait_for_timeout(2000)
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if option_found:
|
||||
break
|
||||
|
||||
if not option_found:
|
||||
print(
|
||||
'Could not find any GitLab repository options, checking if any repositories are available'
|
||||
)
|
||||
|
||||
# Check if there are any options at all
|
||||
all_options = page.locator('[role="option"]')
|
||||
option_count = all_options.count()
|
||||
print(f'Found {option_count} repository options total')
|
||||
|
||||
if option_count > 0:
|
||||
print('Selecting first available repository as fallback')
|
||||
all_options.first.click()
|
||||
selected_repo = 'first-available'
|
||||
option_found = True
|
||||
else:
|
||||
print(
|
||||
'No repository options found - this may indicate GitLab API access issues'
|
||||
)
|
||||
# Try keyboard navigation as last resort
|
||||
page.keyboard.press('ArrowDown')
|
||||
page.wait_for_timeout(500)
|
||||
page.keyboard.press('Enter')
|
||||
print('Used keyboard navigation to select repository')
|
||||
selected_repo = 'keyboard-selected'
|
||||
option_found = True
|
||||
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
# Step 6: Select branch (prefer main/master, otherwise first)
|
||||
print('Step 6: Selecting branch...')
|
||||
|
||||
# Try multiple selectors for the branch dropdown
|
||||
branch_selectors = [
|
||||
'text=Select branch...',
|
||||
'[placeholder="Select branch..."]',
|
||||
'div:has-text("Select branch...")',
|
||||
'.react-select__placeholder:has-text("Select branch...")',
|
||||
'[data-testid*="branch"] >> text=Select branch...',
|
||||
]
|
||||
|
||||
branch_dropdown = None
|
||||
for selector in branch_selectors:
|
||||
try:
|
||||
element = page.locator(selector).first
|
||||
if element.is_visible(timeout=3000):
|
||||
branch_dropdown = element
|
||||
print(f'Found branch dropdown with selector: {selector}')
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if branch_dropdown is None:
|
||||
print('Could not find branch dropdown; branch may auto-select. Proceeding...')
|
||||
else:
|
||||
# Try to open the branch dropdown robustly
|
||||
try:
|
||||
# Prefer clicking the react-select control rather than the placeholder
|
||||
branch_scope = page.locator('div:has-text("Select branch...")').first
|
||||
control = branch_scope.locator(
|
||||
'.select__control, .react-select__control'
|
||||
).first
|
||||
if control.is_visible(timeout=2000):
|
||||
control.click(force=True)
|
||||
else:
|
||||
# Fallback to clicking the placeholder text with force to avoid overlay interception
|
||||
placeholder = branch_scope.locator('text=Select branch...').first
|
||||
if placeholder.is_visible(timeout=1000):
|
||||
placeholder.click(force=True)
|
||||
else:
|
||||
branch_scope.click(force=True)
|
||||
page.wait_for_timeout(500)
|
||||
except Exception as e:
|
||||
print(f'Primary click on branch dropdown failed: {e}')
|
||||
try:
|
||||
# Fallback to previous approach
|
||||
branch_dropdown.click(force=True)
|
||||
page.wait_for_timeout(500)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# First try main/master explicitly; if not found, pick the first option
|
||||
selected_branch = None
|
||||
try:
|
||||
for text in ['main', 'master']:
|
||||
try:
|
||||
opt = page.locator(f'[role="option"]:has-text("{text}")').first
|
||||
if opt.is_visible(timeout=1000):
|
||||
opt.click()
|
||||
selected_branch = text
|
||||
print(f'Selected {text} branch explicitly')
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
if not selected_branch:
|
||||
first_opt = page.locator('[role="option"]').first
|
||||
if first_opt.is_visible(timeout=1000):
|
||||
first_opt.click()
|
||||
selected_branch = 'first-available'
|
||||
print('Selected first available branch')
|
||||
except Exception:
|
||||
print(
|
||||
'Could not select branch explicitly; proceeding with auto-selected branch'
|
||||
)
|
||||
|
||||
page.screenshot(path='test-results/gitlab_04_repo_selected.png')
|
||||
print('Screenshot saved: gitlab_04_repo_selected.png')
|
||||
|
||||
# Step 7: Launch the repository
|
||||
print('Step 7: Launching GitLab repository...')
|
||||
|
||||
launch_button = page.locator('[data-testid="repo-launch-button"]')
|
||||
expect(launch_button).to_be_visible(timeout=10000)
|
||||
|
||||
# Wait for the button to be enabled
|
||||
max_wait_attempts = 30
|
||||
button_enabled = False
|
||||
for attempt in range(max_wait_attempts):
|
||||
try:
|
||||
is_disabled = launch_button.is_disabled()
|
||||
if not is_disabled:
|
||||
print(f'Launch button is now enabled (attempt {attempt + 1})')
|
||||
button_enabled = True
|
||||
break
|
||||
else:
|
||||
print(
|
||||
f'Launch button still disabled, waiting... (attempt {attempt + 1}/{max_wait_attempts})'
|
||||
)
|
||||
page.wait_for_timeout(2000)
|
||||
except Exception as e:
|
||||
print(f'Error checking button state (attempt {attempt + 1}): {e}')
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
try:
|
||||
if button_enabled:
|
||||
launch_button.click()
|
||||
print('Launch button clicked normally')
|
||||
else:
|
||||
print('Launch button still disabled, trying JavaScript force click...')
|
||||
result = page.evaluate("""() => {
|
||||
const button = document.querySelector('[data-testid="repo-launch-button"]');
|
||||
if (button) {
|
||||
button.removeAttribute('disabled');
|
||||
button.click();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}""")
|
||||
if result:
|
||||
print('Successfully force-clicked Launch button with JavaScript')
|
||||
else:
|
||||
print('JavaScript could not find the Launch button')
|
||||
except Exception as e:
|
||||
print(f'Error clicking Launch button: {e}')
|
||||
page.screenshot(path='test-results/gitlab_05_launch_error.png')
|
||||
print('Screenshot saved: gitlab_05_launch_error.png')
|
||||
raise
|
||||
|
||||
# Step 8: Wait for conversation interface to load
|
||||
print('Step 8: Waiting for conversation interface to load...')
|
||||
|
||||
navigation_timeout = 300000 # 5 minutes
|
||||
check_interval = 10000 # 10 seconds
|
||||
|
||||
page.screenshot(path='test-results/gitlab_06_after_launch.png')
|
||||
print('Screenshot saved: gitlab_06_after_launch.png')
|
||||
|
||||
# Prefer URL-based navigation check first
|
||||
try:
|
||||
page.wait_for_url('**/conversations/*', timeout=180000)
|
||||
print(f'Navigated to conversations page: {page.url}')
|
||||
except Exception as e:
|
||||
try:
|
||||
current_url = page.url
|
||||
print(f'Current URL after launch: {current_url} (error: {e})')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Wait for loading to complete
|
||||
loading_selectors = [
|
||||
'[data-testid="loading-indicator"]',
|
||||
'[data-testid="loading-spinner"]',
|
||||
'.loading-spinner',
|
||||
'.spinner',
|
||||
'div:has-text("Loading...")',
|
||||
'div:has-text("Initializing...")',
|
||||
'div:has-text("Please wait...")',
|
||||
]
|
||||
|
||||
for selector in loading_selectors:
|
||||
try:
|
||||
loading = page.locator(selector)
|
||||
if loading.is_visible(timeout=5000):
|
||||
print(f'Found loading indicator with selector: {selector}')
|
||||
print('Waiting for loading to complete...')
|
||||
expect(loading).not_to_be_visible(timeout=120000)
|
||||
print('Loading completed')
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Wait for conversation interface
|
||||
start_time = time.time()
|
||||
conversation_loaded = False
|
||||
while time.time() - start_time < navigation_timeout / 1000:
|
||||
try:
|
||||
selectors = [
|
||||
'.scrollbar.flex.flex-col.grow',
|
||||
'[data-testid="chat-input"]',
|
||||
'[data-testid="app-route"]',
|
||||
'[data-testid="conversation-screen"]',
|
||||
'[data-testid="message-input"]',
|
||||
'.conversation-container',
|
||||
'.chat-container',
|
||||
'textarea',
|
||||
'form textarea',
|
||||
'div[role="main"]',
|
||||
'main',
|
||||
]
|
||||
|
||||
for selector in selectors:
|
||||
try:
|
||||
element = page.locator(selector)
|
||||
if element.is_visible(timeout=2000):
|
||||
print(
|
||||
f'Found conversation interface element with selector: {selector}'
|
||||
)
|
||||
conversation_loaded = True
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if conversation_loaded:
|
||||
break
|
||||
|
||||
if (time.time() - start_time) % (check_interval / 1000) < 1:
|
||||
elapsed = int(time.time() - start_time)
|
||||
page.screenshot(path=f'test-results/gitlab_waiting_{elapsed}s.png')
|
||||
print(f'Screenshot saved: gitlab_waiting_{elapsed}s.png')
|
||||
|
||||
page.wait_for_timeout(5000)
|
||||
except Exception as e:
|
||||
print(f'Error checking for conversation interface: {e}')
|
||||
page.wait_for_timeout(5000)
|
||||
|
||||
if not conversation_loaded:
|
||||
print('Timed out waiting for conversation interface to load')
|
||||
page.screenshot(path='test-results/gitlab_07_timeout.png')
|
||||
print('Screenshot saved: gitlab_07_timeout.png')
|
||||
raise TimeoutError('Timed out waiting for conversation interface to load')
|
||||
|
||||
# Step 9: Wait for agent to be ready
|
||||
print('Step 9: Waiting for agent to be ready for input...')
|
||||
|
||||
max_wait_time = 480 # 8 minutes
|
||||
start_time = time.time()
|
||||
agent_ready = False
|
||||
print(f'Waiting up to {max_wait_time} seconds for agent to be ready...')
|
||||
|
||||
while time.time() - start_time < max_wait_time:
|
||||
elapsed = int(time.time() - start_time)
|
||||
if elapsed % 30 == 0 and elapsed > 0:
|
||||
page.screenshot(path=f'test-results/gitlab_waiting_{elapsed}s.png')
|
||||
print(
|
||||
f'Screenshot saved: gitlab_waiting_{elapsed}s.png (waiting {elapsed}s)'
|
||||
)
|
||||
|
||||
try:
|
||||
# Check if input field and submit button are ready
|
||||
input_ready = False
|
||||
submit_ready = False
|
||||
try:
|
||||
input_field = page.locator('[data-testid="chat-input"] textarea')
|
||||
submit_button = page.locator(
|
||||
'[data-testid="chat-input"] button[type="submit"]'
|
||||
)
|
||||
if (
|
||||
input_field.is_visible(timeout=2000)
|
||||
and input_field.is_enabled(timeout=2000)
|
||||
and submit_button.is_visible(timeout=2000)
|
||||
and submit_button.is_enabled(timeout=2000)
|
||||
):
|
||||
print(
|
||||
'Chat input field and submit button are both visible and enabled'
|
||||
)
|
||||
input_ready = True
|
||||
submit_ready = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check for ready indicators
|
||||
ready_indicators = [
|
||||
'div:has-text("Agent is ready")',
|
||||
'div:has-text("Waiting for user input")',
|
||||
'div:has-text("Awaiting input")',
|
||||
'div:has-text("Task completed")',
|
||||
'div:has-text("Agent has finished")',
|
||||
]
|
||||
|
||||
for indicator in ready_indicators:
|
||||
try:
|
||||
element = page.locator(indicator)
|
||||
if element.is_visible(timeout=2000):
|
||||
print(f'Agent appears ready (found: {indicator})')
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if input_ready and submit_ready:
|
||||
print(
|
||||
'✅ Agent is ready for user input - input field and submit button are enabled'
|
||||
)
|
||||
agent_ready = True
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print(f'Error checking agent ready state: {e}')
|
||||
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
if not agent_ready:
|
||||
page.screenshot(path='test-results/gitlab_timeout_waiting_for_agent.png')
|
||||
raise AssertionError(
|
||||
f'Agent did not become ready for input within {max_wait_time} seconds'
|
||||
)
|
||||
|
||||
page.screenshot(path='test-results/gitlab_08_agent_ready.png')
|
||||
print('Screenshot saved: gitlab_08_agent_ready.png')
|
||||
|
||||
# Step 10: Ask the agent to verify repository access
|
||||
print('Step 10: Asking agent to verify repository access...')
|
||||
|
||||
# Find the message input
|
||||
message_input = page.locator('[data-testid="chat-input"] textarea')
|
||||
expect(message_input).to_be_visible(timeout=10000)
|
||||
|
||||
# Type the question - adapt based on which repository was selected
|
||||
if selected_repo and selected_repo.startswith('gitlab-org/'):
|
||||
question = 'Please count how many lines are in the README.md file and tell me the exact number.'
|
||||
print(f'Using GitLab-specific question for repository: {selected_repo}')
|
||||
else:
|
||||
question = 'Please list the files in the current directory and tell me what repository this is.'
|
||||
print(f'Using generic question for repository: {selected_repo}')
|
||||
|
||||
message_input.fill(question)
|
||||
print(f'Typed question: {question}')
|
||||
|
||||
# Submit the message
|
||||
submit_button = page.locator('[data-testid="chat-input"] button[type="submit"]')
|
||||
expect(submit_button).to_be_visible(timeout=5000)
|
||||
submit_button.click()
|
||||
print('Submitted question to agent')
|
||||
|
||||
page.screenshot(path='test-results/gitlab_09_question_sent.png')
|
||||
print('Screenshot saved: gitlab_09_question_sent.png')
|
||||
|
||||
# Step 11: Wait for agent response
|
||||
print('Step 11: Waiting for agent response...')
|
||||
|
||||
response_timeout = 300 # 5 minutes
|
||||
start_time = time.time()
|
||||
response_received = False
|
||||
|
||||
while time.time() - start_time < response_timeout:
|
||||
elapsed = int(time.time() - start_time)
|
||||
if elapsed % 30 == 0 and elapsed > 0:
|
||||
page.screenshot(path=f'test-results/gitlab_response_waiting_{elapsed}s.png')
|
||||
print(
|
||||
f'Screenshot saved: gitlab_response_waiting_{elapsed}s.png (waiting {elapsed}s)'
|
||||
)
|
||||
|
||||
try:
|
||||
# Look for agent response - adapt based on question asked
|
||||
if selected_repo and selected_repo.startswith('gitlab-org/'):
|
||||
# Look for line count information
|
||||
response_selectors = [
|
||||
'div:has-text("lines")',
|
||||
'div:has-text("README.md")',
|
||||
'div:has-text("file has")',
|
||||
'div:has-text("contains")',
|
||||
'div:has-text("total")',
|
||||
]
|
||||
expected_words = ['lines', 'readme', 'file']
|
||||
else:
|
||||
# Look for file listing or repository information
|
||||
response_selectors = [
|
||||
'div:has-text("files")',
|
||||
'div:has-text("directory")',
|
||||
'div:has-text("repository")',
|
||||
'div:has-text("README")',
|
||||
'div:has-text("ls")',
|
||||
]
|
||||
expected_words = ['files', 'directory', 'repository', 'readme']
|
||||
|
||||
for selector in response_selectors:
|
||||
try:
|
||||
response_element = page.locator(selector)
|
||||
if response_element.is_visible(timeout=2000):
|
||||
response_text = response_element.text_content()
|
||||
if response_text and any(
|
||||
word in response_text.lower() for word in expected_words
|
||||
):
|
||||
print(f'Found agent response: {response_text[:200]}...')
|
||||
response_received = True
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if response_received:
|
||||
break
|
||||
|
||||
# Check if agent is still working
|
||||
working_indicators = [
|
||||
'div:has-text("Working...")',
|
||||
'div:has-text("Thinking...")',
|
||||
'div:has-text("Processing...")',
|
||||
'.loading-spinner',
|
||||
]
|
||||
|
||||
still_working = False
|
||||
for indicator in working_indicators:
|
||||
try:
|
||||
element = page.locator(indicator)
|
||||
if element.is_visible(timeout=1000):
|
||||
still_working = True
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not still_working and elapsed > 60:
|
||||
# Check if there's any new content in the conversation
|
||||
try:
|
||||
conversation_content = page.locator(
|
||||
'[data-testid="conversation-screen"]'
|
||||
).text_content()
|
||||
if conversation_content and len(conversation_content) > 100:
|
||||
print('Agent appears to have responded, checking content...')
|
||||
response_received = True
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
print(f'Error checking for agent response: {e}')
|
||||
|
||||
page.wait_for_timeout(5000)
|
||||
|
||||
if not response_received:
|
||||
page.screenshot(path='test-results/gitlab_10_no_response.png')
|
||||
print('Screenshot saved: gitlab_10_no_response.png')
|
||||
raise AssertionError(f'Agent did not respond within {response_timeout} seconds')
|
||||
|
||||
# Final screenshot
|
||||
page.screenshot(path='test-results/gitlab_11_success.png')
|
||||
print('Screenshot saved: gitlab_11_success.png')
|
||||
|
||||
print('✅ GitLab repository integration test completed successfully!')
|
||||
if selected_repo and selected_repo.startswith('gitlab-org/'):
|
||||
print(
|
||||
f'The agent was able to access and work with the GitLab repository: {selected_repo}'
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f'The agent was able to access and work with the repository: {selected_repo}'
|
||||
)
|
||||
print(
|
||||
'Note: GitLab provider was selected but a different repository was used due to access limitations.'
|
||||
)
|
||||
@@ -94,7 +94,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
|
||||
# Verify the server is actually stopped by trying to start another one
|
||||
# on the same port (regardless of OS)
|
||||
action = CmdRunAction(command='ls')
|
||||
action.set_hard_timeout(3)
|
||||
action.set_hard_timeout(1)
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
|
||||
@@ -44,7 +44,7 @@ def sse_mcp_docker_server():
|
||||
|
||||
container_command_args = [
|
||||
'--stdio',
|
||||
'npx -y @modelcontextprotocol/server-filesystem@2025.8.18 /',
|
||||
'npx -y @modelcontextprotocol/server-filesystem /',
|
||||
'--port',
|
||||
str(container_internal_port), # MCP server inside container listens on this
|
||||
'--baseUrl',
|
||||
@@ -292,7 +292,7 @@ async def test_microagent_and_one_stdio_mcp_in_config(
|
||||
name='filesystem',
|
||||
command='npx',
|
||||
args=[
|
||||
'@modelcontextprotocol/server-filesystem@2025.8.18',
|
||||
'@modelcontextprotocol/server-filesystem',
|
||||
'/',
|
||||
],
|
||||
)
|
||||
|
||||
+5
-160
@@ -12,10 +12,8 @@ from litellm.exceptions import (
|
||||
from openhands.core.config import LLMConfig
|
||||
from openhands.core.exceptions import LLMNoResponseError, OperationCancelled
|
||||
from openhands.core.message import Message, TextContent
|
||||
from openhands.llm.async_llm import AsyncLLM
|
||||
from openhands.llm.llm import LLM
|
||||
from openhands.llm.metrics import Metrics, TokenUsage
|
||||
from openhands.llm.streaming_llm import StreamingLLM
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
@@ -254,7 +252,7 @@ def test_response_latency_tracking(mock_time, mock_litellm_completion):
|
||||
|
||||
@patch('openhands.llm.llm.litellm.get_model_info')
|
||||
def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
|
||||
default_config.model = 'openrouter/gpt-4o-mini'
|
||||
default_config.model = 'openrouter:gpt-4o-mini'
|
||||
mock_get_model_info.return_value = {
|
||||
'max_input_tokens': 7000,
|
||||
'max_output_tokens': 1500,
|
||||
@@ -263,7 +261,7 @@ def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
|
||||
llm.init_model_info()
|
||||
assert llm.config.max_input_tokens == 7000
|
||||
assert llm.config.max_output_tokens == 1500
|
||||
mock_get_model_info.assert_called_once_with('openrouter/gpt-4o-mini')
|
||||
mock_get_model_info.assert_called_once_with('openrouter:gpt-4o-mini')
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
@@ -1203,108 +1201,6 @@ def test_gemini_medium_reasoning_effort_passes_through(mock_completion):
|
||||
assert call_kwargs.get('reasoning_effort') == 'medium'
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
def test_opus_41_keeps_temperature_top_p(mock_completion):
|
||||
mock_completion.return_value = {
|
||||
'choices': [{'message': {'content': 'ok'}}],
|
||||
}
|
||||
config = LLMConfig(
|
||||
model='anthropic/claude-opus-4-1-20250805',
|
||||
api_key='k',
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
)
|
||||
llm = LLM(config, service_id='svc')
|
||||
llm.completion(messages=[{'role': 'user', 'content': 'hi'}])
|
||||
call_kwargs = mock_completion.call_args[1]
|
||||
assert call_kwargs.get('temperature') == 0.7
|
||||
# Anthropic rejects both temperature and top_p together on Opus; we keep temperature and drop top_p
|
||||
assert 'top_p' not in call_kwargs
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
def test_opus_4_keeps_temperature_top_p(mock_completion):
|
||||
mock_completion.return_value = {
|
||||
'choices': [{'message': {'content': 'ok'}}],
|
||||
}
|
||||
config = LLMConfig(
|
||||
model='anthropic/claude-opus-4-20250514',
|
||||
api_key='k',
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
)
|
||||
llm = LLM(config, service_id='svc')
|
||||
llm.completion(messages=[{'role': 'user', 'content': 'hi'}])
|
||||
call_kwargs = mock_completion.call_args[1]
|
||||
assert call_kwargs.get('temperature') == 0.7
|
||||
assert call_kwargs.get('top_p') == 0.9
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
def test_opus_41_disables_thinking(mock_completion):
|
||||
mock_completion.return_value = {
|
||||
'choices': [{'message': {'content': 'ok'}}],
|
||||
}
|
||||
config = LLMConfig(
|
||||
model='anthropic/claude-opus-4-1-20250805',
|
||||
api_key='k',
|
||||
)
|
||||
llm = LLM(config, service_id='svc')
|
||||
llm.completion(messages=[{'role': 'user', 'content': 'hi'}])
|
||||
call_kwargs = mock_completion.call_args[1]
|
||||
assert call_kwargs.get('thinking') == {'type': 'disabled'}
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm.get_model_info')
|
||||
def test_is_caching_prompt_active_anthropic_prefixed(mock_get_model_info):
|
||||
# Avoid external calls, but behavior shouldn't depend on model info
|
||||
mock_get_model_info.side_effect = Exception('skip')
|
||||
config = LLMConfig(
|
||||
model='anthropic/claude-3-7-sonnet', api_key='k', caching_prompt=True
|
||||
)
|
||||
llm = LLM(config, service_id='svc')
|
||||
assert llm.is_caching_prompt_active() is True
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.httpx.get')
|
||||
@patch('openhands.llm.llm.litellm.get_model_info')
|
||||
def test_openhands_provider_rewrite_and_caching_prompt(
|
||||
mock_get_model_info, mock_httpx_get
|
||||
):
|
||||
# Mock LiteLLM proxy /v1/model/info response
|
||||
mock_httpx_get.return_value = type(
|
||||
'Resp',
|
||||
(),
|
||||
{
|
||||
'json': lambda self=None: {
|
||||
'data': [
|
||||
{
|
||||
'model_name': 'claude-3.7-sonnet',
|
||||
'model_info': {
|
||||
'max_input_tokens': 200000,
|
||||
'max_output_tokens': 64000,
|
||||
'supports_vision': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
)()
|
||||
mock_get_model_info.return_value = {
|
||||
'max_input_tokens': 200000,
|
||||
'max_output_tokens': 64000,
|
||||
}
|
||||
|
||||
config = LLMConfig(
|
||||
model='openhands/claude-3.7-sonnet', api_key='k', caching_prompt=True
|
||||
)
|
||||
llm = LLM(config, service_id='svc')
|
||||
# Model should be rewritten to litellm_proxy/...
|
||||
assert llm.config.model.startswith('litellm_proxy/claude-3.7-sonnet')
|
||||
# Caching prompt should be active for Claude
|
||||
assert llm.is_caching_prompt_active() is True
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
def test_gemini_high_reasoning_effort_passes_through(mock_completion):
|
||||
"""Test that Gemini with reasoning_effort='high' passes through to litellm."""
|
||||
@@ -1343,61 +1239,10 @@ def test_non_gemini_uses_reasoning_effort(mock_completion):
|
||||
sample_messages = [{'role': 'user', 'content': 'Hello, how are you?'}]
|
||||
llm.completion(messages=sample_messages)
|
||||
|
||||
|
||||
@patch('openhands.llm.async_llm.litellm_acompletion')
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_reasoning_effort_passthrough(mock_acompletion):
|
||||
mock_acompletion.return_value = {
|
||||
'choices': [{'message': {'content': 'ok'}}],
|
||||
}
|
||||
config = LLMConfig(
|
||||
model='o3', api_key='k', temperature=0.7, top_p=0.9, reasoning_effort='low'
|
||||
)
|
||||
llm = AsyncLLM(config, service_id='svc')
|
||||
await llm.async_completion(messages=[{'role': 'user', 'content': 'hi'}])
|
||||
call_kwargs = mock_acompletion.call_args[1]
|
||||
assert call_kwargs.get('reasoning_effort') == 'low'
|
||||
# Async path does not pop temperature/top_p (parity with main)
|
||||
assert call_kwargs.get('temperature') == 0.7
|
||||
assert call_kwargs.get('top_p') == 0.9
|
||||
|
||||
|
||||
@patch('openhands.llm.streaming_llm.AsyncLLM._call_acompletion')
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_reasoning_effort_passthrough(mock_call):
|
||||
async def fake_stream(*args, **kwargs):
|
||||
class Dummy:
|
||||
async def __aiter__(self):
|
||||
yield {'choices': [{'delta': {'content': 'x'}}]}
|
||||
|
||||
return Dummy()
|
||||
|
||||
mock_call.side_effect = fake_stream
|
||||
config = LLMConfig(
|
||||
model='o3', api_key='k', temperature=0.7, top_p=0.9, reasoning_effort='low'
|
||||
)
|
||||
sllm = StreamingLLM(config, service_id='svc')
|
||||
async for _ in sllm.async_streaming_completion(
|
||||
messages=[{'role': 'user', 'content': 'hi'}]
|
||||
):
|
||||
break
|
||||
call_kwargs = mock_call.call_args[1]
|
||||
assert call_kwargs.get('reasoning_effort') == 'low'
|
||||
assert call_kwargs.get('temperature') == 0.7
|
||||
assert call_kwargs.get('top_p') == 0.9
|
||||
|
||||
|
||||
@patch('openhands.llm.async_llm.litellm_acompletion')
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_streaming_no_thinking_for_gemini(mock_acompletion):
|
||||
mock_acompletion.return_value = {
|
||||
'choices': [{'message': {'content': 'ok'}}],
|
||||
}
|
||||
config = LLMConfig(model='gemini-2.5-pro', api_key='k', reasoning_effort='low')
|
||||
llm = AsyncLLM(config, service_id='svc')
|
||||
await llm.async_completion(messages=[{'role': 'user', 'content': 'hi'}])
|
||||
call_kwargs = mock_acompletion.call_args[1]
|
||||
# Verify that reasoning_effort was used and thinking budget was not set
|
||||
call_kwargs = mock_completion.call_args[1]
|
||||
assert 'thinking' not in call_kwargs
|
||||
assert call_kwargs.get('reasoning_effort') == 'high'
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
|
||||
@@ -96,52 +96,6 @@ FNCALL_TOOLS: list[ChatCompletionToolParam] = [
|
||||
]
|
||||
|
||||
|
||||
def test_malformed_parameter_parsing_recovery():
|
||||
"""Ensure we can recover when models emit malformed parameter tags like <parameter=command=str_replace</parameter>.
|
||||
|
||||
This simulates a tool call to str_replace_editor where the 'command' parameter is malformed.
|
||||
"""
|
||||
from openhands.llm.fn_call_converter import (
|
||||
convert_non_fncall_messages_to_fncall_messages,
|
||||
)
|
||||
|
||||
# Construct an assistant message with malformed parameter tag for 'command'
|
||||
assistant_message = {
|
||||
'role': 'assistant',
|
||||
'content': (
|
||||
'<function=str_replace_editor>\n'
|
||||
'<parameter=command=str_replace</parameter>\n' # malformed form
|
||||
'<parameter=path>/repo/app.py</parameter>\n'
|
||||
'<parameter=old_str>foo</parameter>\n'
|
||||
'<parameter=new_str>bar</parameter>\n'
|
||||
'</function>'
|
||||
),
|
||||
}
|
||||
|
||||
messages = [
|
||||
{'role': 'system', 'content': 'test'},
|
||||
{'role': 'user', 'content': 'do edit'},
|
||||
assistant_message,
|
||||
]
|
||||
|
||||
converted = convert_non_fncall_messages_to_fncall_messages(messages, FNCALL_TOOLS)
|
||||
|
||||
# The last message should be assistant with a parsed tool call
|
||||
last = converted[-1]
|
||||
assert last['role'] == 'assistant'
|
||||
assert 'tool_calls' in last and len(last['tool_calls']) == 1
|
||||
tool_call = last['tool_calls'][0]
|
||||
assert tool_call['type'] == 'function'
|
||||
assert tool_call['function']['name'] == 'str_replace_editor'
|
||||
|
||||
# Arguments must be a valid JSON with command=str_replace and proper params
|
||||
args = json.loads(tool_call['function']['arguments'])
|
||||
assert args['command'] == 'str_replace'
|
||||
assert args['path'] == '/repo/app.py'
|
||||
assert args['old_str'] == 'foo'
|
||||
assert args['new_str'] == 'bar'
|
||||
|
||||
|
||||
def test_convert_tools_to_description():
|
||||
formatted_tools = convert_tools_to_description(FNCALL_TOOLS)
|
||||
print(formatted_tools)
|
||||
|
||||
@@ -1,291 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from openhands.llm.model_features import (
|
||||
ModelFeatures,
|
||||
get_features,
|
||||
model_matches,
|
||||
normalize_model_name,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'raw,expected',
|
||||
[
|
||||
(' OPENAI/gpt-4o ', 'gpt-4o'),
|
||||
('anthropic/claude-3-7-sonnet', 'claude-3-7-sonnet'),
|
||||
('litellm_proxy/gemini-2.5-pro', 'gemini-2.5-pro'),
|
||||
('qwen3-coder-480b-a35b-instruct', 'qwen3-coder-480b-a35b-instruct'),
|
||||
('gpt-5', 'gpt-5'),
|
||||
('deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', 'deepseek-r1-0528'),
|
||||
('openai/GLM-4.5-GGUF', 'glm-4.5'),
|
||||
('openrouter/gpt-4o-mini', 'gpt-4o-mini'),
|
||||
(
|
||||
'bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0',
|
||||
'anthropic.claude-3-5-sonnet-20241022-v2',
|
||||
),
|
||||
('', ''),
|
||||
(None, ''), # type: ignore[arg-type]
|
||||
],
|
||||
)
|
||||
def test_normalize_model_name(raw, expected):
|
||||
assert normalize_model_name(raw) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'name,pattern,expected',
|
||||
[
|
||||
('gpt-4o', 'gpt-4o*', True),
|
||||
('openai/gpt-4o', 'gpt-4o*', True),
|
||||
('litellm_proxy/gpt-4o-mini', 'gpt-4o*', True),
|
||||
('claude-3-7-sonnet-20250219', 'claude-3-7-sonnet*', True),
|
||||
('o1-2024-12-17', 'o1*', True),
|
||||
('grok-4-0709', 'grok-4-0709', True),
|
||||
('grok-4-0801', 'grok-4-0709', False),
|
||||
],
|
||||
)
|
||||
def test_model_matches(name, pattern, expected):
|
||||
assert model_matches(name, [pattern]) is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'name,pattern,expected',
|
||||
[
|
||||
('openai/gpt-4o', 'openai/gpt-4o*', True),
|
||||
('openrouter/gpt-4o', 'openai/gpt-4o*', False),
|
||||
('litellm_proxy/gpt-4o-mini', 'litellm_proxy/gpt-4o*', True),
|
||||
(
|
||||
'gpt-4o',
|
||||
'openai/gpt-4o*',
|
||||
False,
|
||||
), # basename alone should not match provider-qualified
|
||||
('unknown-model', 'gpt-5*', False),
|
||||
],
|
||||
)
|
||||
def test_model_matches_provider_qualified(name, pattern, expected):
|
||||
assert model_matches(name, [pattern]) is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model,expect',
|
||||
[
|
||||
(
|
||||
'gpt-4o',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=False,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'gpt-5',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=True,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'o3-mini',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=True,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'o1-2024-12-17',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=True,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=False,
|
||||
),
|
||||
),
|
||||
(
|
||||
'xai/grok-4-0709',
|
||||
ModelFeatures(
|
||||
supports_function_calling=False,
|
||||
supports_reasoning_effort=False,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=False,
|
||||
),
|
||||
),
|
||||
(
|
||||
'anthropic/claude-3-7-sonnet',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=False,
|
||||
supports_prompt_cache=True,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'litellm_proxy/claude-3.7-sonnet',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=False,
|
||||
supports_prompt_cache=True,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'gemini-2.5-pro',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=True,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'openai/gpt-4o',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=False,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
), # provider-qualified still matches basename patterns
|
||||
],
|
||||
)
|
||||
def test_get_features(model, expect):
|
||||
features = get_features(model)
|
||||
assert features == expect
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model',
|
||||
[
|
||||
# Anthropic families
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-3.7-sonnet',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3-5-sonnet',
|
||||
'claude-3.5-haiku',
|
||||
'claude-3-5-haiku-20241022',
|
||||
'claude-sonnet-4-latest',
|
||||
'claude-opus-4-1-20250805',
|
||||
# OpenAI families
|
||||
'gpt-4o',
|
||||
'gpt-4.1',
|
||||
'gpt-5',
|
||||
# o-series
|
||||
'o1-2024-12-17',
|
||||
'o3-mini',
|
||||
'o4-mini',
|
||||
# Google Gemini
|
||||
'gemini-2.5-pro',
|
||||
# Others
|
||||
'kimi-k2-0711-preview',
|
||||
'kimi-k2-instruct',
|
||||
'qwen3-coder',
|
||||
'qwen3-coder-480b-a35b-instruct',
|
||||
],
|
||||
)
|
||||
def test_function_calling_models(model):
|
||||
features = get_features(model)
|
||||
assert features.supports_function_calling is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model',
|
||||
[
|
||||
'o1-2024-12-17',
|
||||
'o3-mini',
|
||||
'o4-mini',
|
||||
'gemini-2.5-flash',
|
||||
'gemini-2.5-pro',
|
||||
'gpt-5',
|
||||
],
|
||||
)
|
||||
def test_reasoning_effort_models(model):
|
||||
features = get_features(model)
|
||||
assert features.supports_reasoning_effort is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model',
|
||||
[
|
||||
'deepseek/DeepSeek-R1-0528:671b-Q4_K_XL',
|
||||
'DeepSeek-R1-0528',
|
||||
],
|
||||
)
|
||||
def test_deepseek_reasoning_effort_models(model):
|
||||
features = get_features(model)
|
||||
assert features.supports_reasoning_effort is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model',
|
||||
[
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-3.7-sonnet',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3-5-sonnet',
|
||||
'claude-3-5-haiku-20241022',
|
||||
'claude-3-haiku-20240307',
|
||||
'claude-3-opus-20240229',
|
||||
'claude-sonnet-4-latest',
|
||||
],
|
||||
)
|
||||
def test_prompt_cache_models(model):
|
||||
features = get_features(model)
|
||||
assert features.supports_prompt_cache is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model,expected',
|
||||
[
|
||||
# Positive cases: exactly those supported on main
|
||||
('o1', True),
|
||||
('o1-2024-12-17', True),
|
||||
('o3', True),
|
||||
('o3-2025-04-16', True),
|
||||
('o3-mini', True),
|
||||
('o3-mini-2025-01-31', True),
|
||||
('o4-mini', True),
|
||||
('o4-mini-2025-04-16', True),
|
||||
('gemini-2.5-flash', True),
|
||||
('gemini-2.5-pro', True),
|
||||
('gpt-5', True),
|
||||
('gpt-5-2025-08-07', True),
|
||||
('claude-opus-4-1-20250805', False),
|
||||
# DeepSeek
|
||||
('deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', True),
|
||||
('DeepSeek-R1-0528', True),
|
||||
# Negative cases: ensure we didn't unintentionally expand
|
||||
('o1-mini', False),
|
||||
('o1-preview', False),
|
||||
('gemini-1.0-pro', False),
|
||||
],
|
||||
)
|
||||
def test_reasoning_effort_parity_with_main(model, expected):
|
||||
assert get_features(model).supports_reasoning_effort is expected
|
||||
|
||||
|
||||
def test_prompt_cache_haiku_variants():
|
||||
assert get_features('claude-3-5-haiku-20241022').supports_prompt_cache is True
|
||||
assert get_features('claude-3.5-haiku-20241022').supports_prompt_cache is True
|
||||
|
||||
|
||||
def test_stop_words_grok_provider_prefixed():
|
||||
assert get_features('xai/grok-4-0709').supports_stop_words is False
|
||||
assert get_features('grok-4-0709').supports_stop_words is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model',
|
||||
[
|
||||
'o1-mini',
|
||||
'o1-2024-12-17',
|
||||
'xai/grok-4-0709',
|
||||
'deepseek/DeepSeek-R1-0528:671b-Q4_K_XL',
|
||||
'DeepSeek-R1-0528',
|
||||
],
|
||||
)
|
||||
def test_supports_stop_words_false_models(model):
|
||||
features = get_features(model)
|
||||
assert features.supports_stop_words is False
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user