mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 12e774917d | |||
| b55f893d73 | |||
| 89add3a3e8 | |||
| 871c8f7a7b | |||
| 9908e1b285 | |||
| 793e142c4a |
@@ -133,7 +133,7 @@ install-python-dependencies:
|
||||
export HNSWLIB_NO_NATIVE=1; \
|
||||
poetry run pip install chroma-hnswlib; \
|
||||
fi
|
||||
@poetry install --without llama-index
|
||||
@poetry install --without llama-index,evaluation
|
||||
@if [ -f "/etc/manjaro-release" ]; then \
|
||||
echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
|
||||
poetry run pip install playwright; \
|
||||
|
||||
+39
-39
@@ -5,24 +5,24 @@ FROM ubuntu:22.04 AS dind
|
||||
|
||||
# https://docs.docker.com/engine/install/ubuntu/
|
||||
RUN apt-get update && apt-get install -y \
|
||||
ca-certificates \
|
||||
curl \
|
||||
&& install -m 0755 -d /etc/apt/keyrings \
|
||||
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc \
|
||||
&& chmod a+r /etc/apt/keyrings/docker.asc \
|
||||
&& echo \
|
||||
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
||||
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
ca-certificates \
|
||||
curl \
|
||||
&& install -m 0755 -d /etc/apt/keyrings \
|
||||
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc \
|
||||
&& chmod a+r /etc/apt/keyrings/docker.asc \
|
||||
&& echo \
|
||||
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
||||
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
docker-ce \
|
||||
docker-ce-cli \
|
||||
containerd.io \
|
||||
docker-buildx-plugin \
|
||||
docker-compose-plugin \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
docker-ce \
|
||||
docker-ce-cli \
|
||||
containerd.io \
|
||||
docker-buildx-plugin \
|
||||
docker-compose-plugin \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
|
||||
###
|
||||
FROM dind AS openhands
|
||||
@@ -31,25 +31,25 @@ ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
#
|
||||
RUN apt-get update && apt-get install -y \
|
||||
bash \
|
||||
bash \
|
||||
build-essential \
|
||||
curl \
|
||||
git \
|
||||
git-lfs \
|
||||
software-properties-common \
|
||||
make \
|
||||
git \
|
||||
git-lfs \
|
||||
software-properties-common \
|
||||
make \
|
||||
netcat \
|
||||
sudo \
|
||||
wget \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
wget \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
|
||||
# https://github.com/cli/cli/blob/trunk/docs/install_linux.md
|
||||
RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
|
||||
&& apt-get update && apt-get -y install \
|
||||
&& chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
|
||||
&& apt-get update && apt-get -y install \
|
||||
gh \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
@@ -92,17 +92,17 @@ EOF
|
||||
FROM openhands AS dev
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
dnsutils \
|
||||
file \
|
||||
iproute2 \
|
||||
jq \
|
||||
lsof \
|
||||
ripgrep \
|
||||
silversearcher-ag \
|
||||
vim \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
dnsutils \
|
||||
file \
|
||||
iproute2 \
|
||||
jq \
|
||||
lsof \
|
||||
ripgrep \
|
||||
silversearcher-ag \
|
||||
vim \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
if [ -z "$DATASET" ]; then
|
||||
echo "Dataset not specified, use default 'things'"
|
||||
@@ -34,12 +34,9 @@ if [ -z "$OPENAI_API_KEY" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# IMPORTANT: Because Agent's prompt changes fairly often in the rapidly evolving codebase of OpenHands
|
||||
# We need to track the version of Agent in the evaluation to make sure results are comparable
|
||||
AGENT_VERSION=v$(poetry run python -c "import openhands.agenthub; from openhands.controller.agent import Agent; print(Agent.get_cls('$AGENT').VERSION)")
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
|
||||
@@ -51,7 +48,7 @@ COMMAND="poetry run python evaluation/benchmarks/EDA/run_infer.py \
|
||||
--max-iterations 20 \
|
||||
--OPENAI_API_KEY $OPENAI_API_KEY \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note ${AGENT_VERSION}_${DATASET}"
|
||||
--eval-note ${OPENHANDS_VERSION}_${DATASET}"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -20,10 +20,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="export PYTHONPATH=evaluation/benchmarks/agent_bench:\$PYTHONPATH && poetry run python evaluation/benchmarks/agent_bench/run_infer.py \
|
||||
@@ -31,7 +31,7 @@ COMMAND="export PYTHONPATH=evaluation/benchmarks/agent_bench:\$PYTHONPATH && poe
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 30 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $AGENT_VERSION"
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -21,13 +21,13 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE=$AGENT_VERSION
|
||||
EVAL_NOTE=$OPENHANDS_VERSION
|
||||
|
||||
# Default to NOT use unit tests.
|
||||
if [ -z "$USE_UNIT_TESTS" ]; then
|
||||
|
||||
@@ -21,10 +21,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
|
||||
@@ -33,7 +33,7 @@ COMMAND="poetry run python evaluation/benchmarks/biocoder/run_infer.py \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note ${AGENT_VERSION}_${DATASET}"
|
||||
--eval-note ${OPENHANDS_VERSION}_${DATASET}"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -20,10 +20,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/bird/run_infer.py \
|
||||
@@ -31,7 +31,7 @@ COMMAND="poetry run python evaluation/benchmarks/bird/run_infer.py \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 5 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $AGENT_VERSION" \
|
||||
--eval-note $OPENHANDS_VERSION" \
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -20,13 +20,13 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE="$AGENT_VERSION"
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/browsing_delegation/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
|
||||
@@ -61,10 +61,10 @@ echo "USE_INSTANCE_IMAGE: $USE_INSTANCE_IMAGE"
|
||||
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
|
||||
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
echo "HF SPLIT: $SPLIT"
|
||||
@@ -75,7 +75,7 @@ if [ -z "$USE_HINT_TEXT" ]; then
|
||||
export USE_HINT_TEXT=false
|
||||
fi
|
||||
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
|
||||
EVAL_NOTE="$AGENT_VERSION"
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
# if not using Hint, add -no-hint to the eval note
|
||||
if [ "$USE_HINT_TEXT" = false ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-no-hint"
|
||||
|
||||
@@ -23,10 +23,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/discoverybench/run_infer.py \
|
||||
@@ -35,7 +35,7 @@ COMMAND="poetry run python evaluation/benchmarks/discoverybench/run_infer.py \
|
||||
--max-iterations 10 \
|
||||
--max-chars 10000000 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $AGENT_VERSION"
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -21,17 +21,17 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
if [ -z "$LEVELS" ]; then
|
||||
LEVELS="2023_level1"
|
||||
echo "Levels not specified, use default $LEVELS"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "LEVELS: $LEVELS"
|
||||
|
||||
@@ -42,7 +42,7 @@ COMMAND="poetry run python ./evaluation/benchmarks/gaia/run_infer.py \
|
||||
--level $LEVELS \
|
||||
--data-split validation \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note ${AGENT_VERSION}_${LEVELS}"
|
||||
--eval-note ${OPENHANDS_VERSION}_${LEVELS}"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -21,7 +21,7 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
if [ -z "$HUBS" ]; then
|
||||
HUBS="hf,torch,tf"
|
||||
@@ -29,7 +29,7 @@ if [ -z "$HUBS" ]; then
|
||||
fi
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "HUBS: $HUBS"
|
||||
|
||||
@@ -40,7 +40,7 @@ COMMAND="poetry run python evaluation/benchmarks/gorilla/run_infer.py \
|
||||
--hubs $HUBS \
|
||||
--data-split validation \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note ${AGENT_VERSION}_${LEVELS}"
|
||||
--eval-note ${OPENHANDS_VERSION}_${LEVELS}"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -27,10 +27,10 @@ if [ -z "$DATA_SPLIT" ]; then
|
||||
DATA_SPLIT="gpqa_diamond"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/gpqa/run_infer.py \
|
||||
@@ -39,7 +39,7 @@ COMMAND="poetry run python evaluation/benchmarks/gpqa/run_infer.py \
|
||||
--max-iterations 10 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--data-split $DATA_SPLIT \
|
||||
--eval-note $AGENT_VERSION"
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -58,10 +58,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/humanevalfix/run_infer.py \
|
||||
@@ -69,7 +69,7 @@ COMMAND="poetry run python evaluation/benchmarks/humanevalfix/run_infer.py \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $AGENT_VERSION"
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -28,10 +28,10 @@ if [ -z "$DATASET" ]; then
|
||||
DATASET="ProofWriter"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/logic_reasoning/run_infer.py \
|
||||
@@ -40,7 +40,7 @@ COMMAND="poetry run python evaluation/benchmarks/logic_reasoning/run_infer.py \
|
||||
--dataset $DATASET \
|
||||
--max-iterations 10 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $AGENT_VERSION"
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -25,13 +25,13 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="BrowsingAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE="${AGENT_VERSION}_${NOTE}"
|
||||
EVAL_NOTE="${OPENHANDS_VERSION}_${NOTE}"
|
||||
|
||||
COMMAND="export PYTHONPATH=evaluation/benchmarks/miniwob:\$PYTHONPATH && poetry run python evaluation/benchmarks/miniwob/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
|
||||
@@ -18,10 +18,10 @@ checkout_eval_branch
|
||||
# Only 'CodeActAgent' is supported for MINT now
|
||||
AGENT="CodeActAgent"
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
|
||||
export PYTHONPATH=$(pwd)
|
||||
|
||||
|
||||
@@ -26,10 +26,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/ml_bench/run_infer.py \
|
||||
@@ -37,7 +37,7 @@ COMMAND="poetry run python evaluation/benchmarks/ml_bench/run_infer.py \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $AGENT_VERSION"
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -26,10 +26,10 @@ if [ -z "$USE_KNOWLEDGE" ]; then
|
||||
USE_KNOWLEDGE=false
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/scienceagentbench/run_infer.py \
|
||||
@@ -38,7 +38,7 @@ COMMAND="poetry run python evaluation/benchmarks/scienceagentbench/run_infer.py
|
||||
--use_knowledge $USE_KNOWLEDGE \
|
||||
--max-iterations 30 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $AGENT_VERSION" \
|
||||
--eval-note $OPENHANDS_VERSION" \
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -55,10 +55,10 @@ echo "USE_INSTANCE_IMAGE: $USE_INSTANCE_IMAGE"
|
||||
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
|
||||
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
echo "SPLIT: $SPLIT"
|
||||
@@ -68,7 +68,7 @@ if [ -z "$USE_HINT_TEXT" ]; then
|
||||
export USE_HINT_TEXT=false
|
||||
fi
|
||||
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
|
||||
EVAL_NOTE="$AGENT_VERSION"
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
# if not using Hint, add -no-hint to the eval note
|
||||
if [ "$USE_HINT_TEXT" = false ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-no-hint"
|
||||
|
||||
@@ -38,10 +38,10 @@ if [ -z "$WOLFRAM_APPID" ]; then
|
||||
echo "WOLFRAM_APPID not specified"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
echo "HARDNESS: $HARDNESS"
|
||||
@@ -56,7 +56,7 @@ COMMAND="poetry run python evaluation/benchmarks/toolqa/run_infer.py \
|
||||
--wolfram_alpha_appid $WOLFRAM_APPID\
|
||||
--data-split validation \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note ${AGENT_VERSION}_${LEVELS}"
|
||||
--eval-note ${OPENHANDS_VERSION}_${LEVELS}"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -27,13 +27,13 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="BrowsingAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE="$AGENT_VERSION"
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/webarena/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
|
||||
@@ -21,13 +21,13 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_agent_version
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE=$AGENT_VERSION
|
||||
EVAL_NOTE=$OPENHANDS_VERSION
|
||||
|
||||
# Default to NOT use unit tests.
|
||||
if [ -z "$USE_UNIT_TESTS" ]; then
|
||||
|
||||
@@ -39,8 +39,8 @@ checkout_original_branch() {
|
||||
git checkout $current_branch
|
||||
}
|
||||
|
||||
get_agent_version() {
|
||||
get_openhands_version() {
|
||||
# IMPORTANT: Because Agent's prompt changes fairly often in the rapidly evolving codebase of OpenHands
|
||||
# We need to track the version of Agent in the evaluation to make sure results are comparable
|
||||
AGENT_VERSION=v$(poetry run python -c "import openhands.agenthub; from openhands.controller.agent import Agent; print(Agent.get_cls('$AGENT').VERSION)")
|
||||
OPENHANDS_VERSION=v$(poetry run python -c "from openhands import get_version; print(get_version())")
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ import { WsClientProviderStatus } from "#/context/ws-client-provider";
|
||||
import { ChatInterface } from "#/components/features/chat/chat-interface";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||
const renderChatInterface = (messages: (Message | ErrorMessage)[]) =>
|
||||
const renderChatInterface = (messages: (Message)[]) =>
|
||||
renderWithProviders(<ChatInterface />);
|
||||
|
||||
describe("Empty state", () => {
|
||||
@@ -278,7 +278,7 @@ describe.skip("ChatInterface", () => {
|
||||
});
|
||||
|
||||
it("should render inline errors", () => {
|
||||
const messages: (Message | ErrorMessage)[] = [
|
||||
const messages: (Message)[] = [
|
||||
{
|
||||
sender: "assistant",
|
||||
content: "Hello",
|
||||
@@ -287,9 +287,10 @@ describe.skip("ChatInterface", () => {
|
||||
pending: true,
|
||||
},
|
||||
{
|
||||
error: true,
|
||||
id: "",
|
||||
message: "Something went wrong",
|
||||
type: "error",
|
||||
content: "Something went wrong",
|
||||
sender: "assistant",
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
];
|
||||
renderChatInterface(messages);
|
||||
|
||||
@@ -47,7 +47,7 @@ export function ChatMessage({
|
||||
"rounded-xl relative",
|
||||
"flex flex-col gap-2",
|
||||
type === "user" && " max-w-[305px] p-4 bg-neutral-700 self-end",
|
||||
type === "assistant" && "pb-4 max-w-full bg-tranparent",
|
||||
type === "assistant" && "mt-6 max-w-full bg-tranparent",
|
||||
)}
|
||||
>
|
||||
<CopyToClipboardButton
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
import { useState, useEffect } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
|
||||
interface ErrorMessageProps {
|
||||
id?: string;
|
||||
message: string;
|
||||
}
|
||||
|
||||
export function ErrorMessage({ id, message }: ErrorMessageProps) {
|
||||
const { t, i18n } = useTranslation();
|
||||
const [showDetails, setShowDetails] = useState(true);
|
||||
const [headline, setHeadline] = useState("");
|
||||
const [details, setDetails] = useState(message);
|
||||
|
||||
useEffect(() => {
|
||||
if (id && i18n.exists(id)) {
|
||||
setHeadline(t(id));
|
||||
setDetails(message);
|
||||
setShowDetails(false);
|
||||
}
|
||||
}, [id, message, i18n.language]);
|
||||
|
||||
return (
|
||||
<div className="flex gap-2 items-center justify-start border-l-2 border-danger pl-2 my-2 py-2">
|
||||
<div className="text-sm leading-4 flex flex-col gap-2">
|
||||
{headline && <p className="text-danger font-bold">{headline}</p>}
|
||||
{headline && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowDetails(!showDetails)}
|
||||
className="cursor-pointer text-left"
|
||||
>
|
||||
{showDetails
|
||||
? t("ERROR_MESSAGE$HIDE_DETAILS")
|
||||
: t("ERROR_MESSAGE$SHOW_DETAILS")}
|
||||
</button>
|
||||
)}
|
||||
{showDetails && <p className="text-neutral-300">{details}</p>}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
import { useState, useEffect } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import Markdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import { code } from "../markdown/code";
|
||||
import { ol, ul } from "../markdown/list";
|
||||
import ArrowUp from "#/icons/angle-up-solid.svg?react";
|
||||
import ArrowDown from "#/icons/angle-down-solid.svg?react";
|
||||
|
||||
interface ExpandableMessageProps {
|
||||
id?: string;
|
||||
message: string;
|
||||
type: string;
|
||||
}
|
||||
|
||||
export function ExpandableMessage({
|
||||
id,
|
||||
message,
|
||||
type,
|
||||
}: ExpandableMessageProps) {
|
||||
const { t, i18n } = useTranslation();
|
||||
const [showDetails, setShowDetails] = useState(true);
|
||||
const [headline, setHeadline] = useState("");
|
||||
const [details, setDetails] = useState(message);
|
||||
|
||||
useEffect(() => {
|
||||
if (id && i18n.exists(id)) {
|
||||
setHeadline(t(id));
|
||||
setDetails(message);
|
||||
setShowDetails(false);
|
||||
}
|
||||
}, [id, message, i18n.language]);
|
||||
|
||||
const border = type === "error" ? "border-danger" : "border-neutral-300";
|
||||
const textColor = type === "error" ? "text-danger" : "text-neutral-300";
|
||||
let arrowClasses = "h-4 w-4 ml-2 inline";
|
||||
if (type === "error") {
|
||||
arrowClasses += " fill-danger";
|
||||
} else {
|
||||
arrowClasses += " fill-neutral-300";
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`flex gap-2 items-center justify-start border-l-2 pl-2 my-2 py-2 ${border}`}
|
||||
>
|
||||
<div className="text-sm leading-4 flex flex-col gap-2 max-w-full">
|
||||
{headline && (
|
||||
<p className={`${textColor} font-bold`}>
|
||||
{headline}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowDetails(!showDetails)}
|
||||
className="cursor-pointer text-left"
|
||||
>
|
||||
{showDetails ? (
|
||||
<ArrowUp className={arrowClasses} />
|
||||
) : (
|
||||
<ArrowDown className={arrowClasses} />
|
||||
)}
|
||||
</button>
|
||||
</p>
|
||||
)}
|
||||
{showDetails && (
|
||||
<Markdown
|
||||
className="text-sm overflow-auto"
|
||||
components={{
|
||||
code,
|
||||
ul,
|
||||
ol,
|
||||
}}
|
||||
remarkPlugins={[remarkGfm]}
|
||||
>
|
||||
{details}
|
||||
</Markdown>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,14 +1,10 @@
|
||||
import { ChatMessage } from "#/components/features/chat/chat-message";
|
||||
import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
|
||||
import { ImageCarousel } from "../images/image-carousel";
|
||||
import { ErrorMessage } from "./error-message";
|
||||
|
||||
const isErrorMessage = (
|
||||
message: Message | ErrorMessage,
|
||||
): message is ErrorMessage => "error" in message;
|
||||
import { ExpandableMessage } from "./expandable-message";
|
||||
|
||||
interface MessagesProps {
|
||||
messages: (Message | ErrorMessage)[];
|
||||
messages: Message[];
|
||||
isAwaitingUserConfirmation: boolean;
|
||||
}
|
||||
|
||||
@@ -16,18 +12,28 @@ export function Messages({
|
||||
messages,
|
||||
isAwaitingUserConfirmation,
|
||||
}: MessagesProps) {
|
||||
return messages.map((message, index) =>
|
||||
isErrorMessage(message) ? (
|
||||
<ErrorMessage key={index} id={message.id} message={message.message} />
|
||||
) : (
|
||||
return messages.map((message, index) => {
|
||||
if (message.type === "error" || message.type === "action") {
|
||||
console.log("expando", message);
|
||||
return (
|
||||
<ExpandableMessage
|
||||
key={index}
|
||||
type={message.type}
|
||||
id={message.translationID}
|
||||
message={message.content}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<ChatMessage key={index} type={message.sender} message={message.content}>
|
||||
{message.imageUrls.length > 0 && (
|
||||
{message.imageUrls && message.imageUrls.length > 0 && (
|
||||
<ImageCarousel size="small" images={message.imageUrls} />
|
||||
)}
|
||||
{messages.length - 1 === index &&
|
||||
message.sender === "assistant" &&
|
||||
isAwaitingUserConfirmation && <ConfirmationButtons />}
|
||||
</ChatMessage>
|
||||
),
|
||||
);
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1782,14 +1782,6 @@
|
||||
"fr": "Privé",
|
||||
"tr": "Özel"
|
||||
},
|
||||
"ERROR_MESSAGE$SHOW_DETAILS": {
|
||||
"en": "Show details",
|
||||
"es": "Mostrar detalles"
|
||||
},
|
||||
"ERROR_MESSAGE$HIDE_DETAILS": {
|
||||
"en": "Hide details",
|
||||
"es": "Ocultar detalles"
|
||||
},
|
||||
"STATUS$STARTING_RUNTIME": {
|
||||
"en": "Starting Runtime...",
|
||||
"zh-CN": "启动运行时...",
|
||||
@@ -2012,5 +2004,35 @@
|
||||
"PROJECT_MENU_CARD_CONTEXT_MENU$DOWNLOAD_AS_ZIP_LABEL": {
|
||||
"en": "Download as .zip",
|
||||
"es": "Descargar como .zip"
|
||||
},
|
||||
"ACTION_MESSAGE$RUN": {
|
||||
"en": "Running a bash command"
|
||||
},
|
||||
"ACTION_MESSAGE$RUN_IPYTHON": {
|
||||
"en": "Running a Jupyter command"
|
||||
},
|
||||
"ACTION_MESSAGE$READ": {
|
||||
"en": "Reading the contents of a file"
|
||||
},
|
||||
"ACTION_MESSAGE$WRITE": {
|
||||
"en": "Writing to a file"
|
||||
},
|
||||
"OBSERVATION_MESSAGE$RUN": {
|
||||
"en": "Ran a bash command"
|
||||
},
|
||||
"OBSERVATION_MESSAGE$RUN_IPYTHON": {
|
||||
"en": "Ran a Jupyter command"
|
||||
},
|
||||
"OBSERVATION_MESSAGE$READ": {
|
||||
"en": "Read the contents of a file"
|
||||
},
|
||||
"OBSERVATION_MESSAGE$WRITE": {
|
||||
"en": "Wrote to a file"
|
||||
},
|
||||
"EXPANDABLE_MESSAGE$SHOW_DETAILS": {
|
||||
"en": "Show details"
|
||||
},
|
||||
"EXPANDABLE_MESSAGE$HIDE_DETAILS": {
|
||||
"en": "Hide details"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--!Font Awesome Free 6.7.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free Copyright 2024 Fonticons, Inc.--><path d="M201.4 374.6c12.5 12.5 32.8 12.5 45.3 0l160-160c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0L224 306.7 86.6 169.4c-12.5-12.5-32.8-12.5-45.3 0s-12.5 32.8 0 45.3l160 160z"/></svg>
|
||||
|
After Width: | Height: | Size: 400 B |
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--!Font Awesome Free 6.7.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free Copyright 2024 Fonticons, Inc.--><path d="M201.4 137.4c12.5-12.5 32.8-12.5 45.3 0l160 160c12.5 12.5 12.5 32.8 0 45.3s-32.8 12.5-45.3 0L224 205.3 86.6 342.6c-12.5 12.5-32.8 12.5-45.3 0s-12.5-32.8 0-45.3l160-160z"/></svg>
|
||||
|
After Width: | Height: | Size: 400 B |
Vendored
+4
-7
@@ -1,13 +1,10 @@
|
||||
type Message = {
|
||||
sender: "user" | "assistant";
|
||||
content: string;
|
||||
imageUrls: string[];
|
||||
timestamp: string;
|
||||
imageUrls?: string[];
|
||||
type?: "thought" | "error" | "action";
|
||||
pending?: boolean;
|
||||
};
|
||||
|
||||
type ErrorMessage = {
|
||||
error: boolean;
|
||||
id?: string;
|
||||
message: string;
|
||||
translationID?: string;
|
||||
eventID?: number;
|
||||
};
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import {
|
||||
addAssistantMessage,
|
||||
addAssistantAction,
|
||||
addUserMessage,
|
||||
addErrorMessage,
|
||||
} from "#/state/chat-slice";
|
||||
@@ -50,29 +51,9 @@ const messageActions = {
|
||||
pending: false,
|
||||
}),
|
||||
);
|
||||
} else {
|
||||
store.dispatch(addAssistantMessage(message.args.content));
|
||||
}
|
||||
},
|
||||
[ActionType.FINISH]: (message: ActionMessage) => {
|
||||
store.dispatch(addAssistantMessage(message.message));
|
||||
},
|
||||
[ActionType.REJECT]: (message: ActionMessage) => {
|
||||
store.dispatch(addAssistantMessage(message.message));
|
||||
},
|
||||
[ActionType.DELEGATE]: (message: ActionMessage) => {
|
||||
store.dispatch(addAssistantMessage(message.message));
|
||||
},
|
||||
[ActionType.RUN]: (message: ActionMessage) => {
|
||||
if (message.args.hidden) return;
|
||||
if (message.args.thought) {
|
||||
store.dispatch(addAssistantMessage(message.args.thought));
|
||||
}
|
||||
},
|
||||
[ActionType.RUN_IPYTHON]: (message: ActionMessage) => {
|
||||
if (message.args.thought) {
|
||||
store.dispatch(addAssistantMessage(message.args.thought));
|
||||
}
|
||||
if (message.args.confirmation_state !== "rejected") {
|
||||
store.dispatch(appendJupyterInput(message.args.code));
|
||||
}
|
||||
@@ -124,6 +105,87 @@ export function handleActionMessage(message: ActionMessage) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.source !== "user" && !message.args?.hidden) {
|
||||
if (message.args && message.args.thought) {
|
||||
store.dispatch(addAssistantMessage(message.args.thought));
|
||||
}
|
||||
// Convert the message to a properly typed action
|
||||
const baseAction = {
|
||||
...message,
|
||||
source: "agent" as const,
|
||||
args: {
|
||||
...message.args,
|
||||
thought: message.args?.thought || message.message || "",
|
||||
},
|
||||
};
|
||||
|
||||
// Cast to the appropriate action type based on the action field
|
||||
switch (message.action) {
|
||||
case "run":
|
||||
store.dispatch(
|
||||
addAssistantAction({
|
||||
...baseAction,
|
||||
action: "run" as const,
|
||||
args: {
|
||||
command: String(message.args?.command || ""),
|
||||
confirmation_state: (message.args?.confirmation_state ||
|
||||
"confirmed") as
|
||||
| "confirmed"
|
||||
| "rejected"
|
||||
| "awaiting_confirmation",
|
||||
thought: String(message.args?.thought || message.message || ""),
|
||||
hidden: Boolean(message.args?.hidden),
|
||||
},
|
||||
}),
|
||||
);
|
||||
break;
|
||||
case "message":
|
||||
store.dispatch(
|
||||
addAssistantAction({
|
||||
...baseAction,
|
||||
action: "message" as const,
|
||||
args: {
|
||||
content: String(message.args?.content || message.message || ""),
|
||||
image_urls: Array.isArray(message.args?.image_urls)
|
||||
? message.args.image_urls
|
||||
: null,
|
||||
wait_for_response: Boolean(message.args?.wait_for_response),
|
||||
},
|
||||
}),
|
||||
);
|
||||
break;
|
||||
case "run_ipython":
|
||||
store.dispatch(
|
||||
addAssistantAction({
|
||||
...baseAction,
|
||||
action: "run_ipython" as const,
|
||||
args: {
|
||||
code: String(message.args?.code || ""),
|
||||
confirmation_state: (message.args?.confirmation_state ||
|
||||
"confirmed") as
|
||||
| "confirmed"
|
||||
| "rejected"
|
||||
| "awaiting_confirmation",
|
||||
kernel_init_code: String(message.args?.kernel_init_code || ""),
|
||||
thought: String(message.args?.thought || message.message || ""),
|
||||
},
|
||||
}),
|
||||
);
|
||||
break;
|
||||
default:
|
||||
// For other action types, ensure we have the required thought property
|
||||
store.dispatch(
|
||||
addAssistantAction({
|
||||
...baseAction,
|
||||
action: "reject" as const,
|
||||
args: {
|
||||
thought: String(message.args?.thought || message.message || ""),
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (message.action in messageActions) {
|
||||
const actionFn =
|
||||
messageActions[message.action as keyof typeof messageActions];
|
||||
|
||||
@@ -2,10 +2,14 @@ import { setCurrentAgentState } from "#/state/agent-slice";
|
||||
import { setUrl, setScreenshotSrc } from "#/state/browser-slice";
|
||||
import store from "#/store";
|
||||
import { ObservationMessage } from "#/types/message";
|
||||
import AgentState from "#/types/agent-state";
|
||||
import { appendOutput } from "#/state/command-slice";
|
||||
import { appendJupyterOutput } from "#/state/jupyter-slice";
|
||||
import ObservationType from "#/types/observation-type";
|
||||
import { addAssistantMessage } from "#/state/chat-slice";
|
||||
import {
|
||||
addAssistantMessage,
|
||||
addAssistantObservation,
|
||||
} from "#/state/chat-slice";
|
||||
|
||||
export function handleObservationMessage(message: ObservationMessage) {
|
||||
switch (message.observation) {
|
||||
@@ -46,4 +50,120 @@ export function handleObservationMessage(message: ObservationMessage) {
|
||||
store.dispatch(addAssistantMessage(message.message));
|
||||
break;
|
||||
}
|
||||
if (!message.extras?.hidden) {
|
||||
// Convert the message to the appropriate observation type
|
||||
const { observation } = message;
|
||||
const baseObservation = {
|
||||
...message,
|
||||
source: "agent" as const,
|
||||
};
|
||||
|
||||
switch (observation) {
|
||||
case "agent_state_changed":
|
||||
store.dispatch(
|
||||
addAssistantObservation({
|
||||
...baseObservation,
|
||||
observation: "agent_state_changed" as const,
|
||||
extras: {
|
||||
agent_state: (message.extras.agent_state as AgentState) || "idle",
|
||||
},
|
||||
}),
|
||||
);
|
||||
break;
|
||||
case "run":
|
||||
store.dispatch(
|
||||
addAssistantObservation({
|
||||
...baseObservation,
|
||||
observation: "run" as const,
|
||||
extras: {
|
||||
command: String(message.extras.command || ""),
|
||||
command_id: Number(message.extras.command_id || 0),
|
||||
exit_code: Number(message.extras.exit_code || 0),
|
||||
hidden: Boolean(message.extras.hidden),
|
||||
},
|
||||
}),
|
||||
);
|
||||
break;
|
||||
case "run_ipython":
|
||||
store.dispatch(
|
||||
addAssistantObservation({
|
||||
...baseObservation,
|
||||
observation: "run_ipython" as const,
|
||||
extras: {
|
||||
code: String(message.extras.code || ""),
|
||||
},
|
||||
}),
|
||||
);
|
||||
break;
|
||||
case "delegate":
|
||||
store.dispatch(
|
||||
addAssistantObservation({
|
||||
...baseObservation,
|
||||
observation: "delegate" as const,
|
||||
extras: {
|
||||
outputs:
|
||||
typeof message.extras.outputs === "object"
|
||||
? (message.extras.outputs as Record<string, unknown>)
|
||||
: {},
|
||||
},
|
||||
}),
|
||||
);
|
||||
break;
|
||||
case "browse":
|
||||
store.dispatch(
|
||||
addAssistantObservation({
|
||||
...baseObservation,
|
||||
observation: "browse" as const,
|
||||
extras: {
|
||||
url: String(message.extras.url || ""),
|
||||
screenshot: String(message.extras.screenshot || ""),
|
||||
error: Boolean(message.extras.error),
|
||||
open_page_urls: Array.isArray(message.extras.open_page_urls)
|
||||
? message.extras.open_page_urls
|
||||
: [],
|
||||
active_page_index: Number(message.extras.active_page_index || 0),
|
||||
dom_object:
|
||||
typeof message.extras.dom_object === "object"
|
||||
? (message.extras.dom_object as Record<string, unknown>)
|
||||
: {},
|
||||
axtree_object:
|
||||
typeof message.extras.axtree_object === "object"
|
||||
? (message.extras.axtree_object as Record<string, unknown>)
|
||||
: {},
|
||||
extra_element_properties:
|
||||
typeof message.extras.extra_element_properties === "object"
|
||||
? (message.extras.extra_element_properties as Record<
|
||||
string,
|
||||
unknown
|
||||
>)
|
||||
: {},
|
||||
last_browser_action: String(
|
||||
message.extras.last_browser_action || "",
|
||||
),
|
||||
last_browser_action_error:
|
||||
message.extras.last_browser_action_error,
|
||||
focused_element_bid: String(
|
||||
message.extras.focused_element_bid || "",
|
||||
),
|
||||
},
|
||||
}),
|
||||
);
|
||||
break;
|
||||
case "error":
|
||||
store.dispatch(
|
||||
addAssistantObservation({
|
||||
...baseObservation,
|
||||
observation: "error" as const,
|
||||
source: "user" as const,
|
||||
extras: {
|
||||
error_id: message.extras.error_id,
|
||||
},
|
||||
}),
|
||||
);
|
||||
break;
|
||||
default:
|
||||
// For any unhandled observation types, just ignore them
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
import { createSlice, PayloadAction } from "@reduxjs/toolkit";
|
||||
|
||||
type SliceState = { messages: (Message | ErrorMessage)[] };
|
||||
import { OpenHandsObservation } from "#/types/core/observations";
|
||||
import { OpenHandsAction } from "#/types/core/actions";
|
||||
|
||||
type SliceState = { messages: Message[] };
|
||||
|
||||
const MAX_CONTENT_LENGTH = 1000;
|
||||
|
||||
const HANDLED_ACTIONS = ["run", "run_ipython", "write", "read"];
|
||||
|
||||
const initialState: SliceState = {
|
||||
messages: [],
|
||||
@@ -20,6 +27,7 @@ export const chatSlice = createSlice({
|
||||
}>,
|
||||
) {
|
||||
const message: Message = {
|
||||
type: "thought",
|
||||
sender: "user",
|
||||
content: action.payload.content,
|
||||
imageUrls: action.payload.imageUrls,
|
||||
@@ -40,6 +48,7 @@ export const chatSlice = createSlice({
|
||||
|
||||
addAssistantMessage(state, action: PayloadAction<string>) {
|
||||
const message: Message = {
|
||||
type: "thought",
|
||||
sender: "assistant",
|
||||
content: action.payload,
|
||||
imageUrls: [],
|
||||
@@ -49,12 +58,78 @@ export const chatSlice = createSlice({
|
||||
state.messages.push(message);
|
||||
},
|
||||
|
||||
addAssistantAction(state, action: PayloadAction<OpenHandsAction>) {
|
||||
const actionID = action.payload.action;
|
||||
if (!HANDLED_ACTIONS.includes(actionID)) {
|
||||
return;
|
||||
}
|
||||
const translationID = `ACTION_MESSAGE$${actionID.toUpperCase()}`;
|
||||
let text = "";
|
||||
if (actionID === "run") {
|
||||
text = `\`${action.payload.args.command}\``;
|
||||
} else if (actionID === "run_ipython") {
|
||||
text = `\`\`\`\n${action.payload.args.code}\n\`\`\``;
|
||||
} else if (actionID === "write") {
|
||||
let { content } = action.payload.args;
|
||||
if (content.length > MAX_CONTENT_LENGTH) {
|
||||
content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
|
||||
}
|
||||
text = `${action.payload.args.path}\n${content}`;
|
||||
} else if (actionID === "read") {
|
||||
text = action.payload.args.path;
|
||||
}
|
||||
const message: Message = {
|
||||
type: "action",
|
||||
sender: "assistant",
|
||||
translationID,
|
||||
eventID: action.payload.id,
|
||||
content: text,
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
state.messages.push(message);
|
||||
},
|
||||
|
||||
addAssistantObservation(
|
||||
state,
|
||||
observation: PayloadAction<OpenHandsObservation>,
|
||||
) {
|
||||
const observationID = observation.payload.observation;
|
||||
if (!HANDLED_ACTIONS.includes(observationID)) {
|
||||
return;
|
||||
}
|
||||
const translationID = `OBSERVATION_MESSAGE$${observationID.toUpperCase()}`;
|
||||
const causeID = observation.payload.cause;
|
||||
const causeMessage = state.messages.find(
|
||||
(message) => message.eventID === causeID,
|
||||
);
|
||||
if (!causeMessage) {
|
||||
return;
|
||||
}
|
||||
causeMessage.translationID = translationID;
|
||||
if (observationID === "run" || observationID === "run_ipython") {
|
||||
let { content } = observation.payload;
|
||||
if (content.length > MAX_CONTENT_LENGTH) {
|
||||
content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
|
||||
}
|
||||
content = `\`\`\`\n${content}\n\`\`\``;
|
||||
causeMessage.content = content; // Observation content includes the action
|
||||
}
|
||||
},
|
||||
|
||||
addErrorMessage(
|
||||
state,
|
||||
action: PayloadAction<{ id?: string; message: string }>,
|
||||
) {
|
||||
const { id, message } = action.payload;
|
||||
state.messages.push({ id, message, error: true });
|
||||
console.log("add err message", id, message);
|
||||
state.messages.push({
|
||||
translationID: id,
|
||||
content: message,
|
||||
type: "error",
|
||||
sender: "assistant",
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
},
|
||||
|
||||
clearMessages(state) {
|
||||
@@ -66,6 +141,8 @@ export const chatSlice = createSlice({
|
||||
export const {
|
||||
addUserMessage,
|
||||
addAssistantMessage,
|
||||
addAssistantAction,
|
||||
addAssistantObservation,
|
||||
addErrorMessage,
|
||||
clearMessages,
|
||||
} = chatSlice.actions;
|
||||
|
||||
@@ -96,6 +96,23 @@ export interface ModifyTaskAction extends OpenHandsActionEvent<"modify_task"> {
|
||||
};
|
||||
}
|
||||
|
||||
export interface FileReadAction extends OpenHandsActionEvent<"read"> {
|
||||
source: "agent";
|
||||
args: {
|
||||
path: string;
|
||||
thought: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface FileWriteAction extends OpenHandsActionEvent<"write"> {
|
||||
source: "agent";
|
||||
args: {
|
||||
path: string;
|
||||
content: string;
|
||||
thought: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface RejectAction extends OpenHandsActionEvent<"reject"> {
|
||||
source: "agent";
|
||||
args: {
|
||||
@@ -112,6 +129,8 @@ export type OpenHandsAction =
|
||||
| DelegateAction
|
||||
| BrowseAction
|
||||
| BrowseInteractiveAction
|
||||
| FileReadAction
|
||||
| FileWriteAction
|
||||
| AddTaskAction
|
||||
| ModifyTaskAction
|
||||
| RejectAction;
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
type OpenHandsEventType =
|
||||
export type OpenHandsEventType =
|
||||
| "message"
|
||||
| "agent_state_changed"
|
||||
| "run"
|
||||
| "read"
|
||||
| "write"
|
||||
| "run_ipython"
|
||||
| "delegate"
|
||||
| "browse"
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
export interface ActionMessage {
|
||||
id: number;
|
||||
|
||||
// Either 'agent' or 'user'
|
||||
source: string;
|
||||
source: "agent" | "user";
|
||||
|
||||
// The action to be taken
|
||||
action: string;
|
||||
@@ -19,6 +21,9 @@ export interface ObservationMessage {
|
||||
// The type of observation
|
||||
observation: string;
|
||||
|
||||
id: number;
|
||||
cause: number;
|
||||
|
||||
// The observed data
|
||||
content: string;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user