mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
8 Commits
fix-github
...
uv-migrati
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8a5d8f7006 | ||
|
|
64b1e68d2a | ||
|
|
f2a3a0da56 | ||
|
|
50487f2a9c | ||
|
|
3e6c1f0d27 | ||
|
|
72b200d5a5 | ||
|
|
8968e1f691 | ||
|
|
86374d139d |
3
.github/workflows/e2e-tests.yml
vendored
3
.github/workflows/e2e-tests.yml
vendored
@@ -26,6 +26,9 @@ jobs:
|
||||
with:
|
||||
poetry-version: 2.1.3
|
||||
|
||||
- name: Install UV
|
||||
run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
|
||||
2
.github/workflows/ghcr-build.yml
vendored
2
.github/workflows/ghcr-build.yml
vendored
@@ -116,6 +116,8 @@ jobs:
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Install UV
|
||||
run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||
- name: Set up Python
|
||||
uses: useblacksmith/setup-python@v6
|
||||
with:
|
||||
|
||||
4
.github/workflows/py-tests.yml
vendored
4
.github/workflows/py-tests.yml
vendored
@@ -42,6 +42,8 @@ jobs:
|
||||
node-version: "22.x"
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Install UV
|
||||
run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||
- name: Set up Python
|
||||
uses: useblacksmith/setup-python@v6
|
||||
with:
|
||||
@@ -81,6 +83,8 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Install UV
|
||||
run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||
- name: Set up Python
|
||||
uses: useblacksmith/setup-python@v6
|
||||
with:
|
||||
|
||||
2
.github/workflows/pypi-release.yml
vendored
2
.github/workflows/pypi-release.yml
vendored
@@ -32,6 +32,8 @@ jobs:
|
||||
with:
|
||||
virtualenvs-in-project: true
|
||||
virtualenvs-path: ~/.virtualenvs
|
||||
- name: Install UV
|
||||
run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||
- name: Install Poetry Dependencies
|
||||
run: poetry install --no-interaction --no-root
|
||||
- name: Build poetry project
|
||||
|
||||
74
Makefile
74
Makefile
@@ -14,6 +14,23 @@ PRE_COMMIT_CONFIG_PATH = "./dev_config/python/.pre-commit-config.yaml"
|
||||
PYTHON_VERSION = 3.12
|
||||
KIND_CLUSTER_NAME = "local-hands"
|
||||
|
||||
# Package manager selection: "uv" or "poetry" (default: poetry for backward compatibility)
|
||||
# Set USE_UV=1 to use UV instead of Poetry
|
||||
USE_UV ?= 0
|
||||
ifeq ($(USE_UV),1)
|
||||
PKG_MANAGER = uv
|
||||
PKG_RUN = uv run
|
||||
PKG_INSTALL = uv sync
|
||||
PKG_INSTALL_GROUPS = --group dev --group test --group runtime
|
||||
PKG_INSTALL_ONLY_PREFIX = --only-group
|
||||
else
|
||||
PKG_MANAGER = poetry
|
||||
PKG_RUN = poetry run
|
||||
PKG_INSTALL = poetry install
|
||||
PKG_INSTALL_GROUPS = --with dev,test,runtime
|
||||
PKG_INSTALL_ONLY_PREFIX = --only
|
||||
endif
|
||||
|
||||
# ANSI color codes
|
||||
GREEN=$(shell tput -Txterm setaf 2)
|
||||
YELLOW=$(shell tput -Txterm setaf 3)
|
||||
@@ -40,7 +57,7 @@ check-dependencies:
|
||||
ifeq ($(INSTALL_DOCKER),)
|
||||
@$(MAKE) -s check-docker
|
||||
endif
|
||||
@$(MAKE) -s check-poetry
|
||||
@$(MAKE) -s check-pkg-manager
|
||||
@$(MAKE) -s check-tmux
|
||||
@echo "$(GREEN)Dependencies checked successfully.$(RESET)"
|
||||
|
||||
@@ -116,13 +133,24 @@ check-tmux:
|
||||
echo "$(YELLOW)╚════════════════════════════════════════════════════════════════════════════╝$(RESET)"; \
|
||||
fi
|
||||
|
||||
check-poetry:
|
||||
check-pkg-manager:
|
||||
ifeq ($(USE_UV),1)
|
||||
@echo "$(YELLOW)Checking UV installation...$(RESET)"
|
||||
@if command -v uv > /dev/null; then \
|
||||
echo "$(BLUE)$$(uv --version) is already installed.$(RESET)"; \
|
||||
else \
|
||||
echo "$(RED)UV is not installed. You can install UV by running:"; \
|
||||
echo "$(RED) curl -LsSf https://astral.sh/uv/install.sh | sh$(RESET)"; \
|
||||
echo "$(RED)More detail here: https://docs.astral.sh/uv/getting-started/installation/$(RESET)"; \
|
||||
exit 1; \
|
||||
fi
|
||||
else
|
||||
@echo "$(YELLOW)Checking Poetry installation...$(RESET)"
|
||||
@if command -v poetry > /dev/null; then \
|
||||
POETRY_VERSION=$(shell poetry --version 2>&1 | sed -E 's/Poetry \(version ([0-9]+\.[0-9]+\.[0-9]+)\)/\1/'); \
|
||||
POETRY_VERSION=$$(poetry --version 2>&1 | sed -E 's/Poetry \(version ([0-9]+\.[0-9]+\.[0-9]+)\)/\1/'); \
|
||||
IFS='.' read -r -a POETRY_VERSION_ARRAY <<< "$$POETRY_VERSION"; \
|
||||
if [ $${POETRY_VERSION_ARRAY[0]} -gt 1 ] || ([ $${POETRY_VERSION_ARRAY[0]} -eq 1 ] && [ $${POETRY_VERSION_ARRAY[1]} -ge 8 ]); then \
|
||||
echo "$(BLUE)$(shell poetry --version) is already installed.$(RESET)"; \
|
||||
echo "$(BLUE)$$(poetry --version) is already installed.$(RESET)"; \
|
||||
else \
|
||||
echo "$(RED)Poetry 1.8 or later is required. You can install poetry by running the following command, then adding Poetry to your PATH:"; \
|
||||
echo "$(RED) curl -sSL https://install.python-poetry.org | python$(PYTHON_VERSION) -$(RESET)"; \
|
||||
@@ -135,6 +163,10 @@ check-poetry:
|
||||
echo "$(RED)More detail here: https://python-poetry.org/docs/#installing-with-the-official-installer$(RESET)"; \
|
||||
exit 1; \
|
||||
fi
|
||||
endif
|
||||
|
||||
# Legacy alias for backward compatibility
|
||||
check-poetry: check-pkg-manager
|
||||
|
||||
install-python-dependencies:
|
||||
@echo "$(GREEN)Installing Python dependencies...$(RESET)"
|
||||
@@ -142,6 +174,21 @@ install-python-dependencies:
|
||||
echo "Defaulting TZ (timezone) to UTC"; \
|
||||
export TZ="UTC"; \
|
||||
fi
|
||||
ifeq ($(USE_UV),1)
|
||||
@echo "$(BLUE)Using UV for dependency management$(RESET)"
|
||||
@if [ "$(shell uname)" = "Darwin" ]; then \
|
||||
echo "$(BLUE)Installing chroma-hnswlib...$(RESET)"; \
|
||||
export HNSWLIB_NO_NATIVE=1; \
|
||||
uv pip install chroma-hnswlib; \
|
||||
fi
|
||||
@if [ -n "${DEP_GROUP}" ]; then \
|
||||
echo "Installing only DEP_GROUP=${DEP_GROUP}"; \
|
||||
uv sync --only-group $${DEP_GROUP}; \
|
||||
else \
|
||||
uv sync --group dev --group test --group runtime; \
|
||||
fi
|
||||
else
|
||||
@echo "$(BLUE)Using Poetry for dependency management$(RESET)"
|
||||
poetry env use python$(PYTHON_VERSION)
|
||||
@if [ "$(shell uname)" = "Darwin" ]; then \
|
||||
echo "$(BLUE)Installing chroma-hnswlib...$(RESET)"; \
|
||||
@@ -154,15 +201,16 @@ install-python-dependencies:
|
||||
else \
|
||||
poetry install --with dev,test,runtime; \
|
||||
fi
|
||||
endif
|
||||
@if [ "${INSTALL_PLAYWRIGHT}" != "false" ] && [ "${INSTALL_PLAYWRIGHT}" != "0" ]; then \
|
||||
if [ -f "/etc/manjaro-release" ]; then \
|
||||
echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
|
||||
poetry run pip install playwright; \
|
||||
poetry run playwright install chromium; \
|
||||
$(PKG_RUN) pip install playwright; \
|
||||
$(PKG_RUN) playwright install chromium; \
|
||||
else \
|
||||
if [ ! -f cache/playwright_chromium_is_installed.txt ]; then \
|
||||
echo "Running playwright install --with-deps chromium..."; \
|
||||
poetry run playwright install --with-deps chromium; \
|
||||
$(PKG_RUN) playwright install --with-deps chromium; \
|
||||
mkdir -p cache; \
|
||||
touch cache/playwright_chromium_is_installed.txt; \
|
||||
else \
|
||||
@@ -182,15 +230,15 @@ install-frontend-dependencies: check-npm check-nodejs
|
||||
@cd frontend && npm install
|
||||
@echo "$(GREEN)Frontend dependencies installed successfully.$(RESET)"
|
||||
|
||||
install-pre-commit-hooks: check-python check-poetry install-python-dependencies
|
||||
install-pre-commit-hooks: check-python check-pkg-manager install-python-dependencies
|
||||
@echo "$(YELLOW)Installing pre-commit hooks...$(RESET)"
|
||||
@git config --unset-all core.hooksPath || true
|
||||
@poetry run pre-commit install --config $(PRE_COMMIT_CONFIG_PATH)
|
||||
@$(PKG_RUN) pre-commit install --config $(PRE_COMMIT_CONFIG_PATH)
|
||||
@echo "$(GREEN)Pre-commit hooks installed successfully.$(RESET)"
|
||||
|
||||
lint-backend: install-pre-commit-hooks
|
||||
@echo "$(YELLOW)Running linters...$(RESET)"
|
||||
@poetry run pre-commit run --all-files --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)
|
||||
@$(PKG_RUN) pre-commit run --all-files --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)
|
||||
|
||||
lint-frontend: install-frontend-dependencies
|
||||
@echo "$(YELLOW)Running linters for frontend...$(RESET)"
|
||||
@@ -248,7 +296,7 @@ build-frontend:
|
||||
# Start backend
|
||||
start-backend:
|
||||
@echo "$(YELLOW)Starting backend...$(RESET)"
|
||||
@poetry run uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) --reload --reload-exclude "./workspace"
|
||||
@$(PKG_RUN) uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) --reload --reload-exclude "./workspace"
|
||||
|
||||
# Start frontend
|
||||
start-frontend:
|
||||
@@ -270,7 +318,7 @@ _run_setup:
|
||||
fi
|
||||
@mkdir -p logs
|
||||
@echo "$(YELLOW)Starting backend server...$(RESET)"
|
||||
@poetry run uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) &
|
||||
@$(PKG_RUN) uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) &
|
||||
@echo "$(YELLOW)Waiting for the backend to start...$(RESET)"
|
||||
@until nc -z localhost $(BACKEND_PORT); do sleep 0.1; done
|
||||
@echo "$(GREEN)Backend started successfully.$(RESET)"
|
||||
@@ -367,5 +415,5 @@ help:
|
||||
@echo " $(GREEN)help$(RESET) - Display this help message, providing information on available targets."
|
||||
|
||||
# Phony targets
|
||||
.PHONY: build check-dependencies check-system check-python check-npm check-nodejs check-docker check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint-backend lint-frontend lint test-frontend test build-frontend start-backend start-frontend _run_setup run run-wsl setup-config setup-config-prompts setup-config-basic openhands-cloud-run docker-dev docker-run clean help
|
||||
.PHONY: build check-dependencies check-system check-python check-npm check-nodejs check-docker check-pkg-manager check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint-backend lint-frontend lint test-frontend test build-frontend start-backend start-frontend _run_setup run run-wsl setup-config setup-config-prompts setup-config-basic openhands-cloud-run docker-dev docker-run clean help
|
||||
.PHONY: kind
|
||||
|
||||
@@ -15,6 +15,9 @@ FROM base AS backend-builder
|
||||
WORKDIR /app
|
||||
ENV PYTHONPATH='/app'
|
||||
|
||||
# Package manager selection: set USE_UV=1 to use UV instead of Poetry
|
||||
ARG USE_UV=0
|
||||
|
||||
ENV POETRY_NO_INTERACTION=1 \
|
||||
POETRY_VIRTUALENVS_IN_PROJECT=1 \
|
||||
POETRY_VIRTUALENVS_CREATE=1 \
|
||||
@@ -22,11 +25,21 @@ ENV POETRY_NO_INTERACTION=1 \
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& apt-get install -y curl make git build-essential jq gettext \
|
||||
&& python3 -m pip install poetry --break-system-packages
|
||||
&& python3 -m pip install poetry --break-system-packages \
|
||||
&& curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
# Copy both lock files for flexibility
|
||||
COPY pyproject.toml poetry.lock uv.lock ./
|
||||
RUN touch README.md
|
||||
RUN export POETRY_CACHE_DIR && poetry install --no-root && rm -rf $POETRY_CACHE_DIR
|
||||
|
||||
# Install dependencies using selected package manager
|
||||
RUN if [ "$USE_UV" = "1" ]; then \
|
||||
echo "Installing dependencies with UV..." && \
|
||||
/root/.local/bin/uv sync --no-dev; \
|
||||
else \
|
||||
echo "Installing dependencies with Poetry..." && \
|
||||
export POETRY_CACHE_DIR && poetry install --no-root && rm -rf $POETRY_CACHE_DIR; \
|
||||
fi
|
||||
|
||||
FROM base AS openhands-app
|
||||
|
||||
@@ -76,7 +89,7 @@ COPY --chown=openhands:openhands --chmod=770 --from=backend-builder ${VIRTUAL_EN
|
||||
COPY --chown=openhands:openhands --chmod=770 ./skills ./skills
|
||||
COPY --chown=openhands:openhands --chmod=770 ./openhands ./openhands
|
||||
COPY --chown=openhands:openhands --chmod=777 ./openhands/runtime/plugins ./openhands/runtime/plugins
|
||||
COPY --chown=openhands:openhands pyproject.toml poetry.lock README.md MANIFEST.in LICENSE ./
|
||||
COPY --chown=openhands:openhands pyproject.toml poetry.lock uv.lock README.md MANIFEST.in LICENSE ./
|
||||
|
||||
# This is run as "openhands" user, and will create __pycache__ with openhands:openhands ownership
|
||||
RUN python openhands/core/download.py # No-op to download assets
|
||||
|
||||
@@ -69,6 +69,10 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
|
||||
RUN curl -fsSL https://install.python-poetry.org | python3.12 - \
|
||||
&& ln -s ~/.local/bin/poetry /usr/local/bin/poetry
|
||||
|
||||
# UV (alternative package manager)
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
|
||||
&& ln -s ~/.local/bin/uv /usr/local/bin/uv
|
||||
|
||||
#
|
||||
RUN <<EOF
|
||||
#!/bin/bash
|
||||
@@ -80,9 +84,10 @@ gh --version | head -n 1
|
||||
git --version
|
||||
#
|
||||
python --version
|
||||
echo node `node --version`
|
||||
echo npm `npm --version`
|
||||
echo node \`node --version\`
|
||||
echo npm \`npm --version\`
|
||||
poetry --version
|
||||
uv --version
|
||||
netcat -h 2>&1 | head -n 1
|
||||
" > /version.sh
|
||||
chmod a+x /version.sh
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -40,7 +43,7 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/EDA/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/EDA/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--dataset $DATASET \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -26,7 +29,7 @@ echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="export PYTHONPATH=evaluation/benchmarks/agent_bench:\$PYTHONPATH && poetry run python evaluation/benchmarks/agent_bench/run_infer.py \
|
||||
COMMAND="export PYTHONPATH=evaluation/benchmarks/agent_bench:\$PYTHONPATH && $PKG_RUN python evaluation/benchmarks/agent_bench/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 30 \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -39,7 +42,7 @@ if [ "$USE_UNIT_TESTS" = true ]; then
|
||||
EVAL_NOTE=$EVAL_NOTE-w-test
|
||||
fi
|
||||
|
||||
COMMAND="export PYTHONPATH=evaluation/benchmarks/aider_bench:\$PYTHONPATH && poetry run python evaluation/benchmarks/aider_bench/run_infer.py \
|
||||
COMMAND="export PYTHONPATH=evaluation/benchmarks/aider_bench:\$PYTHONPATH && $PKG_RUN python evaluation/benchmarks/aider_bench/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 30 \
|
||||
|
||||
@@ -2,10 +2,13 @@
|
||||
set -eo pipefail
|
||||
|
||||
# Generate the tasks
|
||||
poetry run python evaluation/benchmarks/algotune/adapter/run_adapter.py --output-path evaluation/benchmarks/algotune/tasks
|
||||
$PKG_RUN python evaluation/benchmarks/algotune/adapter/run_adapter.py --output-path evaluation/benchmarks/algotune/tasks
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -59,7 +62,7 @@ fi
|
||||
echo "ENABLE_VOLUMES: $ENABLE_VOLUMES"
|
||||
|
||||
# Construct the command
|
||||
COMMAND="poetry run python evaluation/benchmarks/algotune/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/algotune/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--optim_task $OPTIM_TASK \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -28,7 +31,7 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/biocoder/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/biocoder/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -26,7 +29,7 @@ echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/bird/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/bird/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 5 \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -28,7 +31,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/browsing_delegation/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/browsing_delegation/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 1 \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
REPO_SPLIT=$1
|
||||
MODEL_CONFIG=$2
|
||||
COMMIT_HASH=$3
|
||||
@@ -84,7 +87,7 @@ fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note=$1
|
||||
COMMAND="poetry run python evaluation/benchmarks/commit0/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/commit0/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -29,7 +32,7 @@ echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/discoverybench/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/discoverybench/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -36,7 +39,7 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "LEVELS: $LEVELS"
|
||||
|
||||
COMMAND="poetry run python ./evaluation/benchmarks/gaia/run_infer.py \
|
||||
COMMAND="$PKG_RUN python ./evaluation/benchmarks/gaia/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 60 \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -33,7 +36,7 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "HUBS: $HUBS"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/gorilla/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/gorilla/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 30 \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
EVAL_LIMIT=$3
|
||||
@@ -33,7 +36,7 @@ echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/gpqa/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/gpqa/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -64,7 +67,7 @@ echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/humanevalfix/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/humanevalfix/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
PROCESS_FILEPATH=$1
|
||||
if [ -z "$PROCESS_FILEPATH" ]; then
|
||||
echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
|
||||
@@ -21,7 +24,7 @@ if [ -n "$EXP_NAME" ]; then
|
||||
fi
|
||||
|
||||
function run_eval() {
|
||||
COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/eval_infer.py \
|
||||
COMMAND="$PKG_RUN python ./evaluation/benchmarks/lca_ci_build_repair/eval_infer.py \
|
||||
--predictions-path $PROCESS_FILEPATH "
|
||||
|
||||
echo "RUNNING: $COMMAND"
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
|
||||
get_openhands_version
|
||||
@@ -16,7 +19,7 @@ if [ -n "$EXP_NAME" ]; then
|
||||
fi
|
||||
|
||||
function run_eval() {
|
||||
COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/run_infer.py \
|
||||
COMMAND="$PKG_RUN python ./evaluation/benchmarks/lca_ci_build_repair/run_infer.py \
|
||||
--llm-config $MODEL_CONFIG "
|
||||
|
||||
# Run the command
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
DATASET=$2
|
||||
COMMIT_HASH=$3
|
||||
@@ -34,7 +37,7 @@ echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/logic_reasoning/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/logic_reasoning/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--dataset $DATASET \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
# configure browsing agent
|
||||
export USE_NAV="false"
|
||||
export USE_CONCISE_ANSWER="true"
|
||||
@@ -33,7 +36,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE="${OPENHANDS_VERSION}_${NOTE}"
|
||||
|
||||
COMMAND="export PYTHONPATH=evaluation/benchmarks/miniwob:\$PYTHONPATH && poetry run python evaluation/benchmarks/miniwob/run_infer.py \
|
||||
COMMAND="export PYTHONPATH=evaluation/benchmarks/miniwob:\$PYTHONPATH && $PKG_RUN python evaluation/benchmarks/miniwob/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
SUBSET=$3
|
||||
@@ -25,7 +28,7 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
|
||||
export PYTHONPATH=$(pwd)
|
||||
|
||||
COMMAND="poetry run python ./evaluation/mint/run_infer.py \
|
||||
COMMAND="$PKG_RUN python ./evaluation/mint/run_infer.py \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 5 \
|
||||
--max-propose-solution 2 \
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
|
||||
PKG_RUN=${PKG_RUN:-poetry run}
|
||||
RESULT_FILE=$1
|
||||
MODEL_CONFIG=$2
|
||||
|
||||
@@ -17,7 +20,7 @@ fi
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "RESULT_FILE: $RESULT_FILE"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/ml_bench/run_analysis.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/ml_bench/run_analysis.py \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--json_file_path $RESULT_FILE"
|
||||
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
SPLIT=$3
|
||||
@@ -32,7 +35,7 @@ echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/ml_bench/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/ml_bench/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
|
||||
@@ -29,7 +29,7 @@ DATASET="${EVAL_DATASET%.jsonl}_with_runtime_.jsonl" # path to converted datase
|
||||
|
||||
# Create the converted dataset file
|
||||
echo "Creating converted dataset at: $DATASET"
|
||||
poetry run python ./evaluation/benchmarks/multi_swe_bench/scripts/data/data_change.py --input "$EVAL_DATASET" --output "$DATASET"
|
||||
$PKG_RUN python ./evaluation/benchmarks/multi_swe_bench/scripts/data/data_change.py --input "$EVAL_DATASET" --output "$DATASET"
|
||||
|
||||
SPLIT="train"
|
||||
export LANGUAGE=java
|
||||
@@ -45,6 +45,9 @@ fi
|
||||
|
||||
# ===== Run inference =====
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
get_openhands_version
|
||||
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
@@ -64,7 +67,7 @@ function run_eval() {
|
||||
export LANGUAGE=java
|
||||
echo "About to run command"
|
||||
COMMAND="EVAL_DOCKER_IMAGE_PREFIX=$EVAL_DOCKER_IMAGE_PREFIX; LANGUAGE=java;
|
||||
poetry run python evaluation/benchmarks/multi_swe_bench/run_infer.py \
|
||||
$PKG_RUN python evaluation/benchmarks/multi_swe_bench/run_infer.py \
|
||||
--agent-cls CodeActAgent \
|
||||
--llm-config $MODEL \
|
||||
--max-iterations $MAX_ITER \
|
||||
@@ -90,7 +93,7 @@ function run_eval() {
|
||||
for run_idx in $(seq 1 $N_RUNS); do
|
||||
if [ -n "$SKIP_IDS_THRESHOLD" ]; then
|
||||
echo "Computing SKIP_IDS for run $run_idx..."
|
||||
SKIP_CMD="poetry run python evaluation/benchmarks/multi_swe_bench/compute_skip_ids.py $SKIP_IDS_THRESHOLD"
|
||||
SKIP_CMD="$PKG_RUN python evaluation/benchmarks/multi_swe_bench/compute_skip_ids.py $SKIP_IDS_THRESHOLD"
|
||||
if [ -n "$SKIP_IDS_PATTERN" ]; then
|
||||
SKIP_CMD="$SKIP_CMD --pattern \"$SKIP_IDS_PATTERN\""
|
||||
fi
|
||||
@@ -150,8 +153,8 @@ for run_idx in $(seq 1 $N_RUNS); do
|
||||
echo "### Evaluating on $OUTPUT_FILE ... ###"
|
||||
OUTPUT_CONFIG_FILE="${OUTPUT_FILE%.jsonl}_config.json"
|
||||
export EVAL_SKIP_BUILD_ERRORS=true
|
||||
COMMAND="poetry run python ./evaluation/benchmarks/multi_swe_bench/scripts/eval/update_multi_swe_bench_config.py --input $OUTPUT_FILE --output $OUTPUT_CONFIG_FILE --dataset $EVAL_DATASET;
|
||||
poetry run python -m multi_swe_bench.harness.run_evaluation --config $OUTPUT_CONFIG_FILE
|
||||
COMMAND="$PKG_RUN python ./evaluation/benchmarks/multi_swe_bench/scripts/eval/update_multi_swe_bench_config.py --input $OUTPUT_FILE --output $OUTPUT_CONFIG_FILE --dataset $EVAL_DATASET;
|
||||
$PKG_RUN python -m multi_swe_bench.harness.run_evaluation --config $OUTPUT_CONFIG_FILE
|
||||
"
|
||||
|
||||
echo "Running command: $COMMAND"
|
||||
@@ -170,10 +173,10 @@ for run_idx in $(seq 1 $N_RUNS); do
|
||||
|
||||
# update the output with evaluation results
|
||||
echo "### Updating the output with evaluation results... ###"
|
||||
poetry run python evaluation/benchmarks/multi_swe_bench/scripts/eval/update_output_with_eval.py $OUTPUT_FILE
|
||||
$PKG_RUN python evaluation/benchmarks/multi_swe_bench/scripts/eval/update_output_with_eval.py $OUTPUT_FILE
|
||||
|
||||
echo "### Combining the final completions... ###"
|
||||
poetry run python evaluation/benchmarks/multi_swe_bench/scripts/eval/combine_final_completions.py $OUTPUT_FILE
|
||||
$PKG_RUN python evaluation/benchmarks/multi_swe_bench/scripts/eval/combine_final_completions.py $OUTPUT_FILE
|
||||
|
||||
echo "### DONE for run $run_idx! ###"
|
||||
echo "You can find the final output at $(dirname $OUTPUT_FILE)/$FINAL_OUTPUT_FILE"
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -115,7 +118,7 @@ fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note=$1
|
||||
COMMAND="poetry run python evaluation/benchmarks/multi_swe_bench/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/multi_swe_bench/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -103,7 +106,7 @@ fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note="${1}"
|
||||
COMMAND="poetry run python evaluation/benchmarks/nocode_bench/run_infer_nc.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/nocode_bench/run_infer_nc.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
USE_KNOWLEDGE=$3
|
||||
@@ -32,7 +35,7 @@ echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/scienceagentbench/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/scienceagentbench/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--use-knowledge $USE_KNOWLEDGE \
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
|
||||
PKG_RUN=${PKG_RUN:-poetry run}
|
||||
FOLDER_PATH=$1
|
||||
NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission
|
||||
mkdir -p $NEW_FOLDER_PATH
|
||||
|
||||
# Build all_preds.jsonl
|
||||
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
|
||||
$PKG_RUN python evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
|
||||
mv $FOLDER_PATH/output.swebench.jsonl $NEW_FOLDER_PATH/all_preds.jsonl
|
||||
|
||||
# Build trajs/
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
|
||||
PKG_RUN=${PKG_RUN:-poetry run}
|
||||
PROCESS_FILEPATH=$1
|
||||
if [ -z "$PROCESS_FILEPATH" ]; then
|
||||
echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
|
||||
@@ -66,7 +69,7 @@ else
|
||||
|
||||
# ==== Convert OH format to SWE-bench format ====
|
||||
echo "Merged output file with fine-grained report will be saved to $FILE_DIR"
|
||||
poetry run python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
|
||||
$PKG_RUN python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
|
||||
# replace .jsonl with .swebench.jsonl in filename
|
||||
SWEBENCH_FORMAT_JSONL=${PROCESS_FILEPATH/.jsonl/.swebench.jsonl}
|
||||
echo "SWEBENCH_FORMAT_JSONL: $SWEBENCH_FORMAT_JSONL"
|
||||
@@ -97,7 +100,7 @@ if [ -z "$INSTANCE_ID" ]; then
|
||||
# Default to SWE-Bench-lite
|
||||
# change `--dataset_name` and `--split` to alter dataset
|
||||
|
||||
poetry run python -m swebench.harness.run_evaluation \
|
||||
$PKG_RUN python -m swebench.harness.run_evaluation \
|
||||
--dataset_name "$DATASET_NAME" \
|
||||
--split "$SPLIT" \
|
||||
--predictions_path $SWEBENCH_FORMAT_JSONL \
|
||||
@@ -140,11 +143,11 @@ if [ -z "$INSTANCE_ID" ]; then
|
||||
mv $REPORT_PATH $RESULT_OUTPUT_DIR/report.json
|
||||
fi
|
||||
|
||||
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
|
||||
$PKG_RUN python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
|
||||
|
||||
else
|
||||
echo "Running SWE-bench evaluation on the instance_id: $INSTANCE_ID"
|
||||
poetry run python -m swebench.harness.run_evaluation \
|
||||
$PKG_RUN python -m swebench.harness.run_evaluation \
|
||||
--dataset_name "$DATASET_NAME" \
|
||||
--split "$SPLIT" \
|
||||
--predictions_path $SWEBENCH_FORMAT_JSONL \
|
||||
|
||||
@@ -35,6 +35,9 @@ MAX_ITER=100
|
||||
|
||||
# ===== Run inference =====
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
get_openhands_version
|
||||
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
@@ -51,7 +54,7 @@ EVAL_NOTE="$OPENHANDS_VERSION-no-hint-$EXP_NAME"
|
||||
|
||||
function run_eval() {
|
||||
local eval_note=$1
|
||||
COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/swe_bench/run_infer.py \
|
||||
--agent-cls CodeActAgent \
|
||||
--llm-config $MODEL \
|
||||
--max-iterations $MAX_ITER \
|
||||
@@ -97,7 +100,7 @@ for run_idx in $(seq 1 $N_RUNS); do
|
||||
|
||||
while true; do
|
||||
echo "### Evaluating on $OUTPUT_FILE ... ###"
|
||||
COMMAND="poetry run python evaluation/benchmarks/swe_bench/eval_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/swe_bench/eval_infer.py \
|
||||
--eval-num-workers $((N_WORKERS * 2)) \
|
||||
--input-file $OUTPUT_FILE \
|
||||
--dataset $DATASET \
|
||||
@@ -123,10 +126,10 @@ for run_idx in $(seq 1 $N_RUNS); do
|
||||
|
||||
# update the output with evaluation results
|
||||
echo "### Updating the output with evaluation results... ###"
|
||||
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $OUTPUT_FILE
|
||||
$PKG_RUN python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $OUTPUT_FILE
|
||||
|
||||
echo "### Combining the final completions... ###"
|
||||
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/combine_final_completions.py $OUTPUT_FILE
|
||||
$PKG_RUN python evaluation/benchmarks/swe_bench/scripts/eval/combine_final_completions.py $OUTPUT_FILE
|
||||
|
||||
echo "### DONE for run $run_idx! ###"
|
||||
echo "You can find the final output at $(dirname $OUTPUT_FILE)/$FINAL_OUTPUT_FILE"
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -103,7 +106,7 @@ fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note="${1}"
|
||||
COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/swe_bench/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -91,7 +94,7 @@ fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note="${1}"
|
||||
COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer_interact.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/swe_bench/run_infer_interact.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -77,7 +80,7 @@ fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note=$1
|
||||
COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_localize.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/swe_bench/run_localize.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -103,7 +106,7 @@ fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note="${1}"
|
||||
COMMAND="poetry run python evaluation/benchmarks/swe_perf/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/swe_perf/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -107,7 +110,7 @@ export NO_CHANGE_TIMEOUT_SECONDS=900 # 15 minutes
|
||||
|
||||
function run_eval() {
|
||||
local eval_note="${1}"
|
||||
COMMAND="poetry run python evaluation/benchmarks/swefficiency/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/swefficiency/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
|
||||
PKG_RUN=${PKG_RUN:-poetry run}
|
||||
FOLDER_PATH=$1
|
||||
NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission
|
||||
mkdir -p $NEW_FOLDER_PATH
|
||||
|
||||
# Build all_preds.jsonl
|
||||
poetry run python evaluation/testgeneval/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
|
||||
$PKG_RUN python evaluation/testgeneval/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
|
||||
mv $FOLDER_PATH/output.swebench.jsonl $NEW_FOLDER_PATH/all_preds.jsonl
|
||||
|
||||
# Build trajs/
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
|
||||
# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
|
||||
PKG_RUN=${PKG_RUN:-poetry run}
|
||||
|
||||
INPUT_FILE=$1
|
||||
NUM_WORKERS=$2
|
||||
DATASET=$3
|
||||
@@ -29,7 +32,7 @@ fi
|
||||
|
||||
echo "... Evaluating on $INPUT_FILE ..."
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/testgeneval/eval_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/testgeneval/eval_infer.py \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--input-file $INPUT_FILE \
|
||||
--dataset $DATASET \
|
||||
@@ -50,4 +53,4 @@ echo $COMMAND
|
||||
eval $COMMAND
|
||||
|
||||
# update the output with evaluation results
|
||||
# poetry run python evaluation/benchmarks/testgeneval/scripts/eval/update_output_with_eval.py $INPUT_FILE
|
||||
# $PKG_RUN python evaluation/benchmarks/testgeneval/scripts/eval/update_output_with_eval.py $INPUT_FILE
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -85,7 +88,7 @@ fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note=$1
|
||||
COMMAND="poetry run python evaluation/benchmarks/testgeneval/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/testgeneval/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
|
||||
PKG_RUN=${PKG_RUN:-poetry run}
|
||||
##################################################################################################
|
||||
# Adapted from https://github.com/TheAgentCompany/TheAgentCompany/blob/main/evaluation/run_eval.sh
|
||||
##################################################################################################
|
||||
@@ -145,7 +148,7 @@ while IFS= read -r task_image; do
|
||||
docker pull $task_image
|
||||
|
||||
# Build the Python command
|
||||
COMMAND="poetry run python -m evaluation.benchmarks.the_agent_company.run_infer \
|
||||
COMMAND="$PKG_RUN python -m evaluation.benchmarks.the_agent_company.run_infer \
|
||||
--agent-llm-config \"$AGENT_LLM_CONFIG\" \
|
||||
--env-llm-config \"$ENV_LLM_CONFIG\" \
|
||||
--outputs-path \"$OUTPUTS_PATH\" \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -47,7 +50,7 @@ echo "DATASET: $DATASET"
|
||||
echo "HARDNESS: $HARDNESS"
|
||||
echo "WOLFRAM_APPID: $WOLFRAM_APPID"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/toolqa/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/toolqa/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 30 \
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
|
||||
PKG_RUN=${PKG_RUN:-poetry run}
|
||||
PROCESS_FILEPATH=$1
|
||||
if [ -z "$PROCESS_FILEPATH" ]; then
|
||||
echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
|
||||
@@ -58,7 +61,7 @@ else
|
||||
|
||||
# ==== Convert OH format to SWE-bench format ====
|
||||
echo "Merged output file with fine-grained report will be saved to $FILE_DIR"
|
||||
poetry run python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
|
||||
$PKG_RUN python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
|
||||
# replace .jsonl with .swebench.jsonl in filename
|
||||
SWEBENCH_FORMAT_JSONL=${PROCESS_FILEPATH/.jsonl/.swebench.jsonl}
|
||||
echo "SWEBENCH_FORMAT_JSONL: $SWEBENCH_FORMAT_JSONL"
|
||||
@@ -83,7 +86,7 @@ if [ -z "$INSTANCE_ID" ]; then
|
||||
# Default to SWE-Bench-lite
|
||||
# change `--dataset_name` and `--split` to alter dataset
|
||||
|
||||
poetry run python -m visualswebench.harness.run_evaluation \
|
||||
$PKG_RUN python -m visualswebench.harness.run_evaluation \
|
||||
--dataset_name "$DATASET_NAME" \
|
||||
--split "$SPLIT" \
|
||||
--predictions_path $SWEBENCH_FORMAT_JSONL \
|
||||
@@ -125,11 +128,11 @@ if [ -z "$INSTANCE_ID" ]; then
|
||||
mv $REPORT_PATH $RESULT_OUTPUT_DIR/report.json
|
||||
fi
|
||||
|
||||
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
|
||||
$PKG_RUN python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
|
||||
|
||||
else
|
||||
echo "Running SWE-bench evaluation on the instance_id: $INSTANCE_ID"
|
||||
poetry run python -m visualswebench.harness.run_evaluation \
|
||||
$PKG_RUN python -m visualswebench.harness.run_evaluation \
|
||||
--dataset_name "$DATASET_NAME" \
|
||||
--split "$SPLIT" \
|
||||
--predictions_path $SWEBENCH_FORMAT_JSONL \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
@@ -84,7 +87,7 @@ fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note=$1
|
||||
COMMAND="poetry run python evaluation/benchmarks/visual_swe_bench/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/visual_swe_bench/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
# configure browsing agent
|
||||
export USE_NAV="true"
|
||||
export USE_CONCISE_ANSWER="true"
|
||||
@@ -32,7 +35,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE="${OPENHANDS_VERSION}"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/visualwebarena/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/visualwebarena/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 15 \
|
||||
|
||||
@@ -3,6 +3,9 @@ set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
# Get package runner (poetry run or uv run based on USE_UV env var)
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
|
||||
# configure webarena websites and environment
|
||||
source evaluation/benchmarks/webarena/scripts/webarena_env.sh
|
||||
|
||||
@@ -35,7 +38,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/webarena/run_infer.py \
|
||||
COMMAND="$PKG_RUN python evaluation/benchmarks/webarena/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 15 \
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
|
||||
# This allows gradual migration from Poetry to UV
|
||||
get_pkg_run() {
|
||||
if [ "${USE_UV:-0}" = "1" ]; then
|
||||
echo "uv run"
|
||||
else
|
||||
echo "poetry run"
|
||||
fi
|
||||
}
|
||||
|
||||
checkout_eval_branch() {
|
||||
if [ -z "$COMMIT_HASH" ]; then
|
||||
echo "Commit hash not specified, use current git commit"
|
||||
@@ -42,5 +52,6 @@ checkout_original_branch() {
|
||||
get_openhands_version() {
|
||||
# IMPORTANT: Because Agent's prompt changes fairly often in the rapidly evolving codebase of OpenHands
|
||||
# We need to track the version of Agent in the evaluation to make sure results are comparable
|
||||
OPENHANDS_VERSION=v$(poetry run python -c "from openhands import get_version; print(get_version())")
|
||||
PKG_RUN=$(get_pkg_run)
|
||||
OPENHANDS_VERSION=v$($PKG_RUN python -c "from openhands import get_version; print(get_version())")
|
||||
}
|
||||
|
||||
@@ -289,12 +289,13 @@ def prep_build_folder(
|
||||
# Copy the 'skills' directory (Skills)
|
||||
shutil.copytree(Path(project_root, 'skills'), Path(build_folder, 'code', 'skills'))
|
||||
|
||||
# Copy pyproject.toml and poetry.lock files
|
||||
for file in ['pyproject.toml', 'poetry.lock']:
|
||||
# Copy pyproject.toml and lock files (poetry.lock and uv.lock if it exists)
|
||||
for file in ['pyproject.toml', 'poetry.lock', 'uv.lock']:
|
||||
src = Path(openhands_source_dir, file)
|
||||
if not src.exists():
|
||||
src = Path(project_root, file)
|
||||
shutil.copy2(src, Path(build_folder, 'code', file))
|
||||
if src.exists():
|
||||
shutil.copy2(src, Path(build_folder, 'code', file))
|
||||
|
||||
# Create a Dockerfile and write it to build_folder
|
||||
dockerfile_content = _generate_dockerfile(
|
||||
@@ -328,13 +329,15 @@ def get_hash_for_lock_files(base_image: str, enable_browser: bool = True) -> str
|
||||
# Only include enable_browser in hash when it's False for backward compatibility
|
||||
if not enable_browser:
|
||||
md5.update(str(enable_browser).encode())
|
||||
for file in ['pyproject.toml', 'poetry.lock']:
|
||||
# Include pyproject.toml and lock files (poetry.lock and uv.lock if it exists)
|
||||
for file in ['pyproject.toml', 'poetry.lock', 'uv.lock']:
|
||||
src = Path(openhands_source_dir, file)
|
||||
if not src.exists():
|
||||
src = Path(openhands_source_dir.parent, file)
|
||||
with open(src, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(4096), b''):
|
||||
md5.update(chunk)
|
||||
if src.exists():
|
||||
with open(src, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(4096), b''):
|
||||
md5.update(chunk)
|
||||
# We get away with truncation because we want something that is unique
|
||||
# rather than something that is cryptographically secure
|
||||
result = truncate_hash(md5.hexdigest())
|
||||
|
||||
@@ -296,7 +296,7 @@ RUN /openhands/micromamba/bin/micromamba create -n openhands -y && \
|
||||
/openhands/micromamba/bin/micromamba install -n openhands -c conda-forge poetry python=3.12 -y
|
||||
USER root
|
||||
|
||||
# Create a clean openhands directory including only the pyproject.toml, poetry.lock and openhands/__init__.py
|
||||
# Create a clean openhands directory including only the pyproject.toml, poetry.lock, uv.lock and openhands/__init__.py
|
||||
RUN \
|
||||
if [ -d /openhands/code ]; then rm -rf /openhands/code; fi && \
|
||||
mkdir -p /openhands/code/openhands && \
|
||||
@@ -307,6 +307,8 @@ RUN \
|
||||
git config --global user.email "openhands@all-hands.dev"
|
||||
|
||||
COPY --chown=openhands:openhands ./code/pyproject.toml ./code/poetry.lock /openhands/code/
|
||||
# Copy uv.lock if it exists (for future UV support)
|
||||
COPY --chown=openhands:openhands ./code/uv.lock* /openhands/code/
|
||||
|
||||
{{ install_dependencies_user() }}
|
||||
{{ install_dependencies_root() }}
|
||||
@@ -342,6 +344,8 @@ RUN \
|
||||
# ================================================================
|
||||
RUN if [ -d /openhands/code/openhands ]; then rm -rf /openhands/code/openhands; fi
|
||||
COPY --chown=openhands:openhands ./code/pyproject.toml ./code/poetry.lock /openhands/code/
|
||||
# Copy uv.lock if it exists (for future UV support)
|
||||
COPY --chown=openhands:openhands ./code/uv.lock* /openhands/code/
|
||||
RUN if [ -d /openhands/code/skills ]; then rm -rf /openhands/code/skills; fi
|
||||
COPY --chown=openhands:openhands ./code/skills /openhands/code/skills
|
||||
COPY --chown=openhands:openhands ./code/openhands /openhands/code/openhands
|
||||
|
||||
4
poetry.lock
generated
4
poetry.lock
generated
@@ -1816,7 +1816,7 @@ files = [
|
||||
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
||||
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||
]
|
||||
markers = {main = "platform_system == \"Windows\" or os_name == \"nt\" or sys_platform == \"win32\"", dev = "os_name == \"nt\" or sys_platform == \"win32\"", runtime = "sys_platform == \"win32\"", test = "sys_platform == \"win32\""}
|
||||
markers = {main = "platform_system == \"Windows\" or sys_platform == \"win32\" or os_name == \"nt\"", dev = "os_name == \"nt\" or sys_platform == \"win32\"", runtime = "sys_platform == \"win32\"", test = "sys_platform == \"win32\""}
|
||||
|
||||
[[package]]
|
||||
name = "comm"
|
||||
@@ -16846,4 +16846,4 @@ third-party-runtimes = ["daytona", "e2b-code-interpreter", "modal", "runloop-api
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.12,<3.14"
|
||||
content-hash = "78c01d3e121d5f27a4120d043408ac0aa93fa95fe0c2c7d678161b08ccb582c1"
|
||||
content-hash = "ef3a6a2526eec15650284a245d0bd0dbf764514401799fc15677d72a7a09b2de"
|
||||
|
||||
175
pyproject.toml
175
pyproject.toml
@@ -1,8 +1,174 @@
|
||||
[build-system]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
requires = [
|
||||
"poetry-core",
|
||||
requires = [ "poetry-core" ]
|
||||
|
||||
[project]
|
||||
name = "openhands-ai"
|
||||
version = "1.1.0"
|
||||
description = "OpenHands: Code Less, Make More"
|
||||
readme = "README.md"
|
||||
license = "MIT"
|
||||
authors = [ { name = "OpenHands", email = "contact@all-hands.dev" } ]
|
||||
requires-python = ">=3.12,<3.14"
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
]
|
||||
# Main dependencies (mirrors [tool.poetry.dependencies] for UV compatibility)
|
||||
dependencies = [
|
||||
"aiohttp>=3.9,!=3.11.13",
|
||||
"anthropic[vertex]",
|
||||
"anyio==4.9",
|
||||
"asyncpg>=0.30",
|
||||
"bashlex>=0.18",
|
||||
"boto3",
|
||||
"browsergym-core==0.13.3",
|
||||
"deprecated",
|
||||
"deprecation>=2.1",
|
||||
"dirhash",
|
||||
"docker",
|
||||
"fastapi",
|
||||
"fastmcp>=2.12.4",
|
||||
"google-api-python-client>=2.164",
|
||||
"google-auth-httplib2",
|
||||
"google-auth-oauthlib",
|
||||
"google-cloud-aiplatform",
|
||||
"google-genai",
|
||||
"html2text",
|
||||
"httpx-aiohttp>=0.1.8",
|
||||
"ipywidgets>=8.1.5",
|
||||
"jinja2>=3.1.6",
|
||||
"joblib",
|
||||
"json-repair",
|
||||
"jupyter-kernel-gateway",
|
||||
"kubernetes>=33.1",
|
||||
"libtmux>=0.46.2",
|
||||
"litellm!=1.64.4,!=1.67.*,>=1.74.3",
|
||||
"lmnr>=0.7.20",
|
||||
"memory-profiler>=0.61",
|
||||
"numpy",
|
||||
"openai==2.8",
|
||||
"openhands-aci==0.3.2",
|
||||
"openhands-agent-server==1.8.1",
|
||||
"openhands-sdk==1.8.1",
|
||||
"openhands-tools==1.8.1",
|
||||
"opentelemetry-api>=1.33.1",
|
||||
"opentelemetry-exporter-otlp-proto-grpc>=1.33.1",
|
||||
"pathspec>=0.12.1",
|
||||
"pexpect",
|
||||
"pg8000>=1.31.5",
|
||||
"pillow>=11.3",
|
||||
"playwright>=1.55",
|
||||
"poetry>=2.1.2",
|
||||
"prompt-toolkit>=3.0.50",
|
||||
"protobuf>=5,<6",
|
||||
"psutil",
|
||||
"pybase62>=1",
|
||||
"pygithub>=2.5",
|
||||
"pyjwt>=2.9",
|
||||
"pylatexenc",
|
||||
"pypdf>=6",
|
||||
"python-docx",
|
||||
"python-dotenv",
|
||||
"python-frontmatter>=1.1",
|
||||
"python-jose[cryptography]>=3.3",
|
||||
"python-json-logger>=3.2.1",
|
||||
"python-multipart",
|
||||
"python-pptx",
|
||||
"python-socketio>=5.11.4",
|
||||
"pythonnet",
|
||||
"pyyaml>=6.0.2",
|
||||
"qtconsole>=5.6.1",
|
||||
"rapidfuzz>=3.9",
|
||||
"redis>=5.2,<7",
|
||||
"requests>=2.32.5",
|
||||
"setuptools>=78.1.1",
|
||||
"shellingham>=1.5.4",
|
||||
"sqlalchemy[asyncio]>=2.0.40",
|
||||
"sse-starlette>=3.0.2",
|
||||
"starlette>=0.48",
|
||||
"tenacity>=8.5,<10",
|
||||
"termcolor",
|
||||
"toml",
|
||||
"tornado>=6.5",
|
||||
"types-toml",
|
||||
"urllib3>=2.6.3",
|
||||
"uvicorn",
|
||||
"whatthepatch>=1.0.6",
|
||||
"zope-interface==7.2",
|
||||
]
|
||||
|
||||
optional-dependencies.third_party_runtimes = [
|
||||
"daytona==0.24.2",
|
||||
"e2b-code-interpreter>=2",
|
||||
"modal>=0.66.26,<1.2",
|
||||
"runloop-api-client==0.50",
|
||||
]
|
||||
urls.Homepage = "https://github.com/OpenHands/OpenHands"
|
||||
urls.Repository = "https://github.com/OpenHands/OpenHands"
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"build",
|
||||
"mypy==1.17",
|
||||
"pre-commit==4.2",
|
||||
"pytest>=8.4",
|
||||
"pytest-asyncio>=1.3",
|
||||
"ruff==0.12.5",
|
||||
"types-setuptools",
|
||||
]
|
||||
test = [
|
||||
"gevent>=24.2.1,<26",
|
||||
"pandas",
|
||||
"pytest",
|
||||
"pytest-asyncio",
|
||||
"pytest-cov",
|
||||
"pytest-forked",
|
||||
"pytest-playwright>=0.7",
|
||||
"pytest-timeout>=2.4",
|
||||
"pytest-xdist",
|
||||
"reportlab",
|
||||
]
|
||||
runtime = [
|
||||
"flake8",
|
||||
"jupyterlab",
|
||||
"notebook",
|
||||
]
|
||||
evaluation = [
|
||||
"boto3-stubs[s3]>=1.37.19",
|
||||
"browsergym==0.13.3",
|
||||
"browsergym-miniwob==0.13.3",
|
||||
"browsergym-visualwebarena==0.13.3",
|
||||
"browsergym-webarena==0.13.3",
|
||||
"commit0",
|
||||
"datasets",
|
||||
"evaluate",
|
||||
"func-timeout",
|
||||
"gdown",
|
||||
"joblib",
|
||||
"matplotlib",
|
||||
"multi-swe-bench==0.1.2",
|
||||
"pandas",
|
||||
"pyarrow==21",
|
||||
"retry",
|
||||
"seaborn",
|
||||
"streamlit",
|
||||
"swebench",
|
||||
"swegym",
|
||||
"sympy",
|
||||
"tabulate",
|
||||
"visualswebench",
|
||||
"whatthepatch",
|
||||
]
|
||||
testgeneval = [
|
||||
"fuzzywuzzy>=0.18",
|
||||
"python-levenshtein>=0.26.1,<0.28",
|
||||
"rouge>=1.0.1",
|
||||
"tree-sitter-python>=0.23.6",
|
||||
]
|
||||
|
||||
# UV source configuration for git dependencies in evaluation group
|
||||
|
||||
[tool.poetry]
|
||||
name = "openhands-ai"
|
||||
@@ -221,3 +387,8 @@ lint.pydocstyle.convention = "google"
|
||||
concurrency = [ "gevent" ]
|
||||
relative_files = true
|
||||
omit = [ "enterprise/tests/*", "**/test_*" ]
|
||||
|
||||
[tool.uv.sources]
|
||||
visualswebench = { git = "https://github.com/luolin101/Visual-SWE-bench.git" }
|
||||
swegym = { git = "https://github.com/SWE-Gym/SWE-Bench-Package.git" }
|
||||
swebench = { git = "https://github.com/ryanhoangt/SWE-bench.git", rev = "fix-modal-patch-eval" }
|
||||
|
||||
@@ -62,12 +62,15 @@ def _check_source_code_in_dir(temp_dir):
|
||||
assert os.path.exists(os.path.join(code_dir, 'pyproject.toml'))
|
||||
|
||||
# The source code should only include the `openhands` folder,
|
||||
# and pyproject.toml & poetry.lock that are needed to build the runtime image
|
||||
assert set(os.listdir(code_dir)) == {
|
||||
'openhands',
|
||||
'pyproject.toml',
|
||||
'poetry.lock',
|
||||
}
|
||||
# and pyproject.toml & lock files that are needed to build the runtime image
|
||||
expected_files = {'openhands', 'pyproject.toml', 'poetry.lock'}
|
||||
# uv.lock is optional - include it if it exists
|
||||
if os.path.exists(os.path.join(code_dir, 'uv.lock')):
|
||||
expected_files.add('uv.lock')
|
||||
# skills directory may also be present
|
||||
if os.path.exists(os.path.join(code_dir, 'skills')):
|
||||
expected_files.add('skills')
|
||||
assert set(os.listdir(code_dir)) == expected_files
|
||||
assert os.path.exists(os.path.join(code_dir, 'openhands'))
|
||||
assert os.path.isdir(os.path.join(code_dir, 'openhands'))
|
||||
|
||||
@@ -89,9 +92,11 @@ def test_prep_build_folder(temp_dir):
|
||||
extra_deps=None,
|
||||
)
|
||||
|
||||
# make sure that the code (openhands/) and microagents folder were copied
|
||||
# make sure that the code (openhands/) and skills folder were copied
|
||||
assert shutil_mock.copytree.call_count == 2
|
||||
assert shutil_mock.copy2.call_count == 2
|
||||
# copy2 is called for pyproject.toml, poetry.lock, and optionally uv.lock
|
||||
# The exact count depends on whether uv.lock exists
|
||||
assert shutil_mock.copy2.call_count >= 2
|
||||
|
||||
# Now check dockerfile is in the folder
|
||||
dockerfile_path = os.path.join(temp_dir, 'Dockerfile')
|
||||
@@ -100,26 +105,35 @@ def test_prep_build_folder(temp_dir):
|
||||
|
||||
|
||||
def test_get_hash_for_lock_files():
|
||||
with patch('builtins.open', mock_open(read_data='mock-data'.encode())):
|
||||
# Mock Path.exists to return True for all files including uv.lock
|
||||
with (
|
||||
patch('builtins.open', mock_open(read_data='mock-data'.encode())),
|
||||
patch.object(Path, 'exists', return_value=True),
|
||||
):
|
||||
hash = get_hash_for_lock_files('some_base_image', enable_browser=True)
|
||||
# Since we mocked open to always return "mock_data", the hash is the result
|
||||
# of hashing the name of the base image followed by "mock-data" twice
|
||||
# of hashing the name of the base image followed by "mock-data" three times
|
||||
# (pyproject.toml, poetry.lock, uv.lock)
|
||||
md5 = hashlib.md5()
|
||||
md5.update('some_base_image'.encode())
|
||||
for _ in range(2):
|
||||
for _ in range(3): # pyproject.toml, poetry.lock, uv.lock
|
||||
md5.update('mock-data'.encode())
|
||||
assert hash == truncate_hash(md5.hexdigest())
|
||||
|
||||
|
||||
def test_get_hash_for_lock_files_different_enable_browser():
|
||||
with patch('builtins.open', mock_open(read_data='mock-data'.encode())):
|
||||
# Mock Path.exists to return True for all files including uv.lock
|
||||
with (
|
||||
patch('builtins.open', mock_open(read_data='mock-data'.encode())),
|
||||
patch.object(Path, 'exists', return_value=True),
|
||||
):
|
||||
hash_true = get_hash_for_lock_files('some_base_image', enable_browser=True)
|
||||
hash_false = get_hash_for_lock_files('some_base_image', enable_browser=False)
|
||||
|
||||
# Hash with enable_browser=True should not include the enable_browser value
|
||||
md5_true = hashlib.md5()
|
||||
md5_true.update('some_base_image'.encode())
|
||||
for _ in range(2):
|
||||
for _ in range(3): # pyproject.toml, poetry.lock, uv.lock
|
||||
md5_true.update('mock-data'.encode())
|
||||
expected_hash_true = truncate_hash(md5_true.hexdigest())
|
||||
|
||||
@@ -127,7 +141,7 @@ def test_get_hash_for_lock_files_different_enable_browser():
|
||||
md5_false = hashlib.md5()
|
||||
md5_false.update('some_base_image'.encode())
|
||||
md5_false.update('False'.encode()) # enable_browser=False is included
|
||||
for _ in range(2):
|
||||
for _ in range(3): # pyproject.toml, poetry.lock, uv.lock
|
||||
md5_false.update('mock-data'.encode())
|
||||
expected_hash_false = truncate_hash(md5_false.hexdigest())
|
||||
|
||||
|
||||
Reference in New Issue
Block a user