mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0818845b84 | |||
| 7ab2ad2c1b | |||
| 8416a019cb | |||
| 73a7c7786d | |||
| 11d12c5a01 | |||
| c4f303a07b | |||
| 3a629cdf08 | |||
| 6ea33b657d |
@@ -48,11 +48,11 @@ jobs:
|
||||
- name: Build Environment
|
||||
run: make build
|
||||
- name: Run Unit Tests
|
||||
run: poetry run pytest --forked -n auto -svv ./tests/unit
|
||||
run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest --forked -n auto -svv ./tests/unit
|
||||
- name: Run Runtime Tests with CLIRuntime
|
||||
run: TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
|
||||
run: PYTHONPATH=".:$PYTHONPATH" TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
|
||||
- name: Run E2E Tests
|
||||
run: poetry run pytest -svv tests/e2e
|
||||
run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest -svv tests/e2e
|
||||
|
||||
# Run specific Windows python tests
|
||||
test-on-windows:
|
||||
@@ -77,9 +77,11 @@ jobs:
|
||||
- name: Run Windows unit tests
|
||||
run: poetry run pytest -svv tests/unit/test_windows_bash.py
|
||||
env:
|
||||
PYTHONPATH: ".;$env:PYTHONPATH"
|
||||
DEBUG: "1"
|
||||
- name: Run Windows runtime tests with LocalRuntime
|
||||
run: $env:TEST_RUNTIME="local"; poetry run pytest -svv tests/runtime/test_bash.py
|
||||
env:
|
||||
PYTHONPATH: ".;$env:PYTHONPATH"
|
||||
TEST_RUNTIME: local
|
||||
DEBUG: "1"
|
||||
|
||||
@@ -183,24 +183,7 @@ The final results will be saved to `evaluation/evaluation_outputs/outputs/swe_be
|
||||
- `report.json`: a JSON file that contains keys like `"resolved_ids"` pointing to instance IDs that are resolved by the agent.
|
||||
- `logs/`: a directory of test logs
|
||||
|
||||
### Run evaluation with `RemoteRuntime`
|
||||
|
||||
OpenHands Remote Runtime is currently in beta (read [here](https://runtime.all-hands.dev/) for more details), it allows you to run rollout in parallel in the cloud, so you don't need a powerful machine to run evaluation.
|
||||
Fill out [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZjCtr7aoBFI2Mwdan3f75J_TrdMS1JV2g/viewform) to apply if you want to try this out!
|
||||
|
||||
```bash
|
||||
./evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh [output.jsonl filepath] [num_workers]
|
||||
|
||||
# Example - This evaluates patches generated by CodeActAgent on Llama-3.1-70B-Instruct-Turbo on "princeton-nlp/SWE-bench_Lite"'s test set, with 16 number of workers running in parallel
|
||||
ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
|
||||
evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/swe-bench-lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_100_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
|
||||
```
|
||||
|
||||
To clean-up all existing runtimes that you've already started, run:
|
||||
|
||||
```bash
|
||||
ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
|
||||
```
|
||||
|
||||
## SWT-Bench Evaluation
|
||||
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
INPUT_FILE=$1
|
||||
NUM_WORKERS=$2
|
||||
DATASET=$3
|
||||
SPLIT=$4
|
||||
|
||||
if [ -z "$INPUT_FILE" ]; then
|
||||
echo "INPUT_FILE not specified (should be a path to a jsonl file)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$DATASET" ]; then
|
||||
echo "DATASET not specified, use default princeton-nlp/SWE-bench_Lite"
|
||||
DATASET="princeton-nlp/SWE-bench_Lite"
|
||||
fi
|
||||
|
||||
if [ -z "$SPLIT" ]; then
|
||||
echo "SPLIT not specified, use default test"
|
||||
SPLIT="test"
|
||||
fi
|
||||
|
||||
if [ -z "$NUM_WORKERS" ]; then
|
||||
echo "NUM_WORKERS not specified, use default 1"
|
||||
NUM_WORKERS=1
|
||||
fi
|
||||
|
||||
echo "... Evaluating on $INPUT_FILE ..."
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/swe_bench/eval_infer.py \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--input-file $INPUT_FILE \
|
||||
--dataset $DATASET \
|
||||
--split $SPLIT"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
|
||||
fi
|
||||
|
||||
# Run the command
|
||||
eval $COMMAND
|
||||
|
||||
# update the output with evaluation results
|
||||
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $INPUT_FILE
|
||||
@@ -5,8 +5,7 @@ pynguin_ids = ['pydata__xarray-6548-16541', 'pydata__xarray-7003-16557', 'pydata
|
||||
ids = ['pydata__xarray-3114-16452', 'pydata__xarray-3151-16453', 'pydata__xarray-3156-16454', 'pydata__xarray-3239-16456', 'pydata__xarray-3239-16457', 'pydata__xarray-3239-16458', 'pydata__xarray-3302-16459', 'pydata__xarray-3364-16461', 'pydata__xarray-3677-16471', 'pydata__xarray-3905-16478', 'pydata__xarray-4182-16484', 'pydata__xarray-4248-16486', 'pydata__xarray-4339-16487', 'pydata__xarray-4419-16488', 'pydata__xarray-4629-16492', 'pydata__xarray-4750-16496', 'pydata__xarray-4802-16505', 'pydata__xarray-4966-16515', 'pydata__xarray-4994-16516', 'pydata__xarray-5033-16517', 'pydata__xarray-5126-16518', 'pydata__xarray-5126-16519', 'pydata__xarray-5131-16520', 'pydata__xarray-5365-16529', 'pydata__xarray-5455-16530', 'pydata__xarray-5662-16532', 'pydata__xarray-5731-16534', 'pydata__xarray-6135-16535', 'pydata__xarray-6135-16536', 'pydata__xarray-6386-16537', 'pydata__xarray-6394-16538', 'pydata__xarray-6400-16539', 'pydata__xarray-6461-16540', 'pydata__xarray-6548-16541', 'pydata__xarray-6599-16543', 'pydata__xarray-6601-16544', 'pydata__xarray-6882-16548', 'pydata__xarray-6889-16549', 'pydata__xarray-7003-16557', 'pydata__xarray-7147-16571', 'pydata__xarray-7150-16572', 'pydata__xarray-7203-16577', 'pydata__xarray-7229-16578', 'pydata__xarray-7393-16581', 'pydata__xarray-7400-16582']
|
||||
|
||||
|
||||
Command eval (our approach):
|
||||
poetry run ./evaluation/benchmarks/testgeneval/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/kjain14__testgeneval-test/CodeActAgent/gpt-4o_maxiter_25_N_v0.20.0-no-hint-run_1/output.jsonl 10 kjain14/testgeneval test true
|
||||
|
||||
|
||||
Command run (our approach):
|
||||
./evaluation/benchmarks/testgeneval/scripts/run_infer.sh llm.eval_gpt HEAD CodeActAgent -1 25 10 kjain14/testgeneval test 1 ../TestGenEval/results/testgeneval/preds/gpt-4o-2024-08-06__testgeneval__0.2__test.jsonl
|
||||
|
||||
@@ -82,5 +82,11 @@ describe("extractModelAndProvider", () => {
|
||||
model: "claude-opus-4-20250514",
|
||||
separator: "/",
|
||||
});
|
||||
|
||||
expect(extractModelAndProvider("claude-opus-4-1-20250805")).toEqual({
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-1-20250805",
|
||||
separator: "/",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
|
||||
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
|
||||
<link rel="manifest" href="/site.webmanifest">
|
||||
<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5">
|
||||
<meta name="msapplication-TileColor" content="#da532c">
|
||||
<meta name="theme-color" content="#ffffff">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<meta name="theme-color" content="#000000" />
|
||||
<meta
|
||||
name="description"
|
||||
content="OpenHands: Code Less, Make More"
|
||||
/>
|
||||
<!--
|
||||
Notice the use of %PUBLIC_URL% in the tags above.
|
||||
It will be replaced with the URL of the `public` folder during the build.
|
||||
Only files inside the `public` folder can be referenced from the HTML.
|
||||
|
||||
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
|
||||
work correctly both with client-side routing and a non-root public URL.
|
||||
Learn how to configure a non-root public URL by running `npm run build`.
|
||||
-->
|
||||
<title>OpenHands</title>
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||
<div id="root"></div>
|
||||
<!--
|
||||
This HTML file is a template.
|
||||
If you open it directly in the browser, you will see an empty page.
|
||||
|
||||
You can add webfonts, meta tags, or analytics to this file.
|
||||
The build step will place the bundled scripts into the <body> tag.
|
||||
|
||||
To begin the development, run `npm start` or `yarn start`.
|
||||
To create a production bundle, use `npm run build` or `yarn build`.
|
||||
-->
|
||||
<script type="module" src="/src/index.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,9 +1,10 @@
|
||||
import React from "react";
|
||||
import toast from "react-hot-toast";
|
||||
import { useCreateConversation } from "./mutation/use-create-conversation";
|
||||
import { useUserProviders } from "./use-user-providers";
|
||||
import { useConversationSubscriptions } from "#/context/conversation-subscriptions-provider";
|
||||
import { Provider } from "#/types/settings";
|
||||
import { CreateMicroagent } from "#/api/open-hands.types";
|
||||
import { TOAST_OPTIONS } from "#/utils/custom-toast-handlers";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
|
||||
/**
|
||||
@@ -11,16 +12,87 @@ import OpenHands from "#/api/open-hands";
|
||||
* This extends the functionality of useCreateConversationAndSubscribe to allow subscribing to
|
||||
* multiple conversations simultaneously.
|
||||
*/
|
||||
|
||||
export const useCreateConversationAndSubscribeMultiple = () => {
|
||||
const { mutate: createConversation, isPending } = useCreateConversation();
|
||||
const { providers } = useUserProviders();
|
||||
const {
|
||||
subscribeToConversation,
|
||||
unsubscribeFromConversation,
|
||||
isSubscribedToConversation,
|
||||
activeConversationIds,
|
||||
subscribeToConversation,
|
||||
} = useConversationSubscriptions();
|
||||
|
||||
// Track conversations that are being set up with their callbacks
|
||||
const [pendingConversations, setPendingConversations] = React.useState<
|
||||
Map<
|
||||
string,
|
||||
{
|
||||
onEventCallback?: (event: unknown, conversationId: string) => void;
|
||||
repositoryName: string;
|
||||
}
|
||||
>
|
||||
>(new Map());
|
||||
|
||||
// Handle polling and subscription for pending conversations
|
||||
React.useEffect(() => {
|
||||
const handleConversationPolling = async () => {
|
||||
const conversationsToProcess = Array.from(pendingConversations.entries());
|
||||
|
||||
await Promise.all(
|
||||
conversationsToProcess.map(async ([conversationId, config]) => {
|
||||
try {
|
||||
const conversation =
|
||||
await OpenHands.getConversation(conversationId);
|
||||
|
||||
if (
|
||||
conversation?.status === "RUNNING" &&
|
||||
conversation.runtime_status
|
||||
) {
|
||||
// Conversation is ready, subscribe to it
|
||||
let baseUrl = "";
|
||||
if (conversation.url && !conversation.url.startsWith("/")) {
|
||||
baseUrl = new URL(conversation.url).host;
|
||||
} else {
|
||||
baseUrl =
|
||||
(import.meta.env.VITE_BACKEND_BASE_URL as
|
||||
| string
|
||||
| undefined) || window?.location.host;
|
||||
}
|
||||
|
||||
subscribeToConversation({
|
||||
conversationId,
|
||||
sessionApiKey: conversation.session_api_key,
|
||||
providersSet: [], // Empty array since we don't need providers for subscription
|
||||
baseUrl,
|
||||
onEvent: config.onEventCallback,
|
||||
});
|
||||
|
||||
// Remove from pending when subscription is established
|
||||
setPendingConversations((prev) => {
|
||||
const newMap = new Map(prev);
|
||||
newMap.delete(conversationId);
|
||||
return newMap;
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
// Remove failed conversation from pending
|
||||
setPendingConversations((prev) => {
|
||||
const newMap = new Map(prev);
|
||||
newMap.delete(conversationId);
|
||||
return newMap;
|
||||
});
|
||||
}
|
||||
}),
|
||||
);
|
||||
};
|
||||
|
||||
if (pendingConversations.size > 0) {
|
||||
const interval = setInterval(handleConversationPolling, 1000);
|
||||
return () => clearInterval(interval);
|
||||
}
|
||||
return undefined;
|
||||
}, [pendingConversations, subscribeToConversation]);
|
||||
|
||||
const createConversationAndSubscribe = React.useCallback(
|
||||
({
|
||||
query,
|
||||
@@ -49,77 +121,33 @@ export const useCreateConversationAndSubscribeMultiple = () => {
|
||||
createMicroagent,
|
||||
},
|
||||
{
|
||||
onSuccess: async (data) => {
|
||||
try {
|
||||
// NOTE: createConversation returns ConversationResponse (no url/session_api_key)
|
||||
// but we need the full Conversation object for WebSocket connection.
|
||||
// Wait for conversation to be fully loaded to get proper url and session_api_key
|
||||
const conversation = await OpenHands.getConversation(
|
||||
data.conversation_id,
|
||||
);
|
||||
if (!conversation) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error("Failed to load conversation after creation");
|
||||
return;
|
||||
}
|
||||
|
||||
let baseUrl = "";
|
||||
if (conversation.url && !conversation.url.startsWith("/")) {
|
||||
baseUrl = new URL(conversation.url).host;
|
||||
} else {
|
||||
baseUrl =
|
||||
(import.meta.env.VITE_BACKEND_BASE_URL as
|
||||
| string
|
||||
| undefined) || window?.location.host;
|
||||
}
|
||||
|
||||
// Subscribe to the conversation using the loaded conversation data
|
||||
subscribeToConversation({
|
||||
conversationId: conversation.conversation_id,
|
||||
sessionApiKey: conversation.session_api_key,
|
||||
providersSet: providers,
|
||||
baseUrl,
|
||||
onEvent: onEventCallback,
|
||||
onSuccess: (data) => {
|
||||
// Add to pending conversations for polling
|
||||
setPendingConversations((prev) => {
|
||||
const newMap = new Map(prev);
|
||||
newMap.set(data.conversation_id, {
|
||||
onEventCallback,
|
||||
repositoryName: repository.name,
|
||||
});
|
||||
return newMap;
|
||||
});
|
||||
|
||||
// Call the success callback if provided
|
||||
if (onSuccessCallback) {
|
||||
onSuccessCallback(data.conversation_id);
|
||||
}
|
||||
} catch (error) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(
|
||||
"Error loading conversation for WebSocket connection:",
|
||||
error,
|
||||
);
|
||||
// Fallback to original behavior if fetching conversation fails
|
||||
let baseUrl = "";
|
||||
if (data?.url && !data.url.startsWith("/")) {
|
||||
baseUrl = new URL(data.url).host;
|
||||
} else {
|
||||
baseUrl =
|
||||
(import.meta.env.VITE_BACKEND_BASE_URL as
|
||||
| string
|
||||
| undefined) || window?.location.host;
|
||||
}
|
||||
// Show immediate "starting" toast to give user feedback
|
||||
toast(`Starting conversation for ${repository.name}...`, {
|
||||
...TOAST_OPTIONS,
|
||||
id: `starting-${data.conversation_id}`,
|
||||
duration: 10000, // Longer duration since this will be replaced by the runtime status toast
|
||||
});
|
||||
|
||||
subscribeToConversation({
|
||||
conversationId: data.conversation_id,
|
||||
sessionApiKey: data.session_api_key,
|
||||
providersSet: providers,
|
||||
baseUrl,
|
||||
onEvent: onEventCallback,
|
||||
});
|
||||
|
||||
if (onSuccessCallback) {
|
||||
onSuccessCallback(data.conversation_id);
|
||||
}
|
||||
// Call the success callback immediately (conversation created)
|
||||
if (onSuccessCallback) {
|
||||
onSuccessCallback(data.conversation_id);
|
||||
}
|
||||
},
|
||||
},
|
||||
);
|
||||
},
|
||||
[createConversation, subscribeToConversation, providers],
|
||||
[createConversation],
|
||||
);
|
||||
|
||||
return {
|
||||
|
||||
@@ -14,6 +14,7 @@ export const VERIFIED_MODELS = [
|
||||
"claude-3-7-sonnet-20250219",
|
||||
"claude-sonnet-4-20250514",
|
||||
"claude-opus-4-20250514",
|
||||
"claude-opus-4-1-20250805",
|
||||
"gemini-2.5-pro",
|
||||
"o4-mini",
|
||||
"deepseek-chat",
|
||||
@@ -47,6 +48,7 @@ export const VERIFIED_ANTHROPIC_MODELS = [
|
||||
"claude-3-7-sonnet-20250219",
|
||||
"claude-sonnet-4-20250514",
|
||||
"claude-opus-4-20250514",
|
||||
"claude-opus-4-1-20250805",
|
||||
];
|
||||
|
||||
// LiteLLM does not return the compatible Mistral models with the provider, so we list them here to set them ourselves
|
||||
@@ -62,6 +64,7 @@ export const VERIFIED_MISTRAL_MODELS = [
|
||||
export const VERIFIED_OPENHANDS_MODELS = [
|
||||
"claude-sonnet-4-20250514",
|
||||
"claude-opus-4-20250514",
|
||||
"claude-opus-4-1-20250805",
|
||||
"gemini-2.5-pro",
|
||||
"o3",
|
||||
"o4-mini",
|
||||
|
||||
@@ -129,12 +129,13 @@ async def run_session(
|
||||
conversation_instructions: str | None = None,
|
||||
session_name: str | None = None,
|
||||
skip_banner: bool = False,
|
||||
conversation_id: str | None = None,
|
||||
) -> bool:
|
||||
reload_microagents = False
|
||||
new_session_requested = False
|
||||
exit_reason = ExitReason.INTENTIONAL
|
||||
|
||||
sid = generate_sid(config, session_name)
|
||||
sid = conversation_id or generate_sid(config, session_name)
|
||||
is_loaded = asyncio.Event()
|
||||
is_paused = asyncio.Event() # Event to track agent pause requests
|
||||
always_confirm_mode = False # Flag to enable always confirm mode
|
||||
@@ -705,6 +706,7 @@ After reviewing the file, please ask the user what they would like to do with it
|
||||
task_str,
|
||||
session_name=args.name,
|
||||
skip_banner=banner_shown,
|
||||
conversation_id=args.conversation,
|
||||
)
|
||||
|
||||
# If a new session was requested, run it
|
||||
|
||||
@@ -27,7 +27,7 @@ from openhands.core.config.condenser_config import (
|
||||
CondenserPipelineConfig,
|
||||
ConversationWindowCondenserConfig,
|
||||
)
|
||||
from openhands.core.config.utils import OH_DEFAULT_AGENT
|
||||
from openhands.core.config.config_utils import OH_DEFAULT_AGENT
|
||||
from openhands.memory.condenser.impl.llm_summarizing_condenser import (
|
||||
LLMSummarizingCondenserConfig,
|
||||
)
|
||||
|
||||
@@ -164,6 +164,7 @@ VERIFIED_OPENAI_MODELS = [
|
||||
VERIFIED_ANTHROPIC_MODELS = [
|
||||
'claude-sonnet-4-20250514',
|
||||
'claude-opus-4-20250514',
|
||||
'claude-opus-4-1-20250805',
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-3-sonnet-20240229',
|
||||
'claude-3-opus-20240229',
|
||||
@@ -184,6 +185,7 @@ VERIFIED_MISTRAL_MODELS = [
|
||||
VERIFIED_OPENHANDS_MODELS = [
|
||||
'claude-sonnet-4-20250514',
|
||||
'claude-opus-4-20250514',
|
||||
'claude-opus-4-1-20250805',
|
||||
'devstral-small-2507',
|
||||
'devstral-medium-2507',
|
||||
'o3',
|
||||
|
||||
@@ -20,10 +20,6 @@ from openhands.core.config.condenser_config import (
|
||||
condenser_config_from_toml_section,
|
||||
create_condenser_config,
|
||||
)
|
||||
from openhands.core.config.config_utils import (
|
||||
OH_DEFAULT_AGENT,
|
||||
OH_MAX_ITERATIONS,
|
||||
)
|
||||
from openhands.core.config.extended_config import ExtendedConfig
|
||||
from openhands.core.config.kubernetes_config import KubernetesConfig
|
||||
from openhands.core.config.llm_config import LLMConfig
|
||||
@@ -712,14 +708,14 @@ def get_parser() -> argparse.ArgumentParser:
|
||||
parser.add_argument(
|
||||
'-c',
|
||||
'--agent-cls',
|
||||
default=OH_DEFAULT_AGENT,
|
||||
default=None,
|
||||
type=str,
|
||||
help='Name of the default agent to use',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-i',
|
||||
'--max-iterations',
|
||||
default=OH_MAX_ITERATIONS,
|
||||
default=None,
|
||||
type=int,
|
||||
help='The maximum number of iterations to run the agent',
|
||||
)
|
||||
@@ -774,6 +770,12 @@ def get_parser() -> argparse.ArgumentParser:
|
||||
type=str,
|
||||
default='',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--conversation',
|
||||
help='The conversation id to continue',
|
||||
type=str,
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--eval-ids',
|
||||
default=None,
|
||||
|
||||
@@ -63,6 +63,7 @@ CACHE_PROMPT_SUPPORTED_MODELS = [
|
||||
'claude-sonnet-4-20250514',
|
||||
'claude-sonnet-4',
|
||||
'claude-opus-4-20250514',
|
||||
'claude-opus-4-1-20250805',
|
||||
]
|
||||
|
||||
# function calling supporting models
|
||||
@@ -77,6 +78,7 @@ FUNCTION_CALLING_SUPPORTED_MODELS = [
|
||||
'claude-sonnet-4-20250514',
|
||||
'claude-sonnet-4',
|
||||
'claude-opus-4-20250514',
|
||||
'claude-opus-4-1-20250805',
|
||||
'gpt-4o-mini',
|
||||
'gpt-4o',
|
||||
'o1-2024-12-17',
|
||||
|
||||
Generated
-6
@@ -1,6 +0,0 @@
|
||||
{
|
||||
"name": "OpenHands",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
|
||||
from openhands.core.config import OH_DEFAULT_AGENT, OH_MAX_ITERATIONS, get_parser
|
||||
from openhands.core.config import get_parser
|
||||
|
||||
|
||||
def test_parser_default_values():
|
||||
@@ -10,8 +10,8 @@ def test_parser_default_values():
|
||||
assert args.directory is None
|
||||
assert args.task == ''
|
||||
assert args.file is None
|
||||
assert args.agent_cls == OH_DEFAULT_AGENT
|
||||
assert args.max_iterations == OH_MAX_ITERATIONS
|
||||
assert args.agent_cls is None
|
||||
assert args.max_iterations is None
|
||||
assert args.max_budget_per_task is None
|
||||
assert args.eval_output_dir == 'evaluation/evaluation_outputs/outputs'
|
||||
assert args.eval_n_limit is None
|
||||
@@ -139,13 +139,14 @@ def test_help_message(capsys):
|
||||
'--selected-repo SELECTED_REPO',
|
||||
'--override-cli-mode OVERRIDE_CLI_MODE',
|
||||
'--log-level LOG_LEVEL',
|
||||
'--conversation CONVERSATION',
|
||||
]
|
||||
|
||||
for element in expected_elements:
|
||||
assert element in help_output, f"Expected '{element}' to be in the help message"
|
||||
|
||||
option_count = help_output.count(' -')
|
||||
assert option_count == 21, f'Expected 21 options, found {option_count}'
|
||||
assert option_count == 22, f'Expected 22 options, found {option_count}'
|
||||
|
||||
|
||||
def test_selected_repo_format():
|
||||
|
||||
@@ -359,6 +359,7 @@ async def test_main_without_task(
|
||||
mock_args.llm_config = None
|
||||
mock_args.name = None
|
||||
mock_args.file = None
|
||||
mock_args.conversation = None
|
||||
mock_parse_args.return_value = mock_args
|
||||
|
||||
# Mock config
|
||||
@@ -412,6 +413,7 @@ async def test_main_without_task(
|
||||
None,
|
||||
session_name=None,
|
||||
skip_banner=False,
|
||||
conversation_id=None,
|
||||
)
|
||||
|
||||
|
||||
@@ -553,6 +555,7 @@ async def test_main_with_session_name_passes_name_to_run_session(
|
||||
mock_args.llm_config = None
|
||||
mock_args.name = test_session_name # Set the session name
|
||||
mock_args.file = None
|
||||
mock_args.conversation = None
|
||||
mock_parse_args.return_value = mock_args
|
||||
|
||||
# Mock config
|
||||
@@ -606,6 +609,7 @@ async def test_main_with_session_name_passes_name_to_run_session(
|
||||
None,
|
||||
session_name=test_session_name,
|
||||
skip_banner=False,
|
||||
conversation_id=None,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@ from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
|
||||
from openhands.core.config import (
|
||||
OH_DEFAULT_AGENT,
|
||||
OH_MAX_ITERATIONS,
|
||||
OpenHandsConfig,
|
||||
get_llm_config_arg,
|
||||
setup_config_from_args,
|
||||
@@ -308,3 +310,74 @@ def test_cli_settings_json_not_override_config_toml(
|
||||
# Verify that settings.json did not override config.toml
|
||||
assert test_llm_config.model == 'config-toml-model'
|
||||
assert test_llm_config.api_key == 'config-toml-api-key'
|
||||
|
||||
|
||||
def test_default_values_applied_when_none():
|
||||
"""Test that default values are applied when config values are None."""
|
||||
|
||||
# Create mock args with None values for agent_cls and max_iterations
|
||||
mock_args = MagicMock()
|
||||
mock_args.config_file = None
|
||||
mock_args.llm_config = None
|
||||
mock_args.agent_cls = None
|
||||
mock_args.max_iterations = None
|
||||
|
||||
# Load config
|
||||
with patch(
|
||||
'openhands.core.config.utils.load_openhands_config',
|
||||
return_value=OpenHandsConfig(),
|
||||
):
|
||||
config = setup_config_from_args(mock_args)
|
||||
|
||||
# Verify they match the expected defaults
|
||||
assert config.default_agent == OH_DEFAULT_AGENT
|
||||
assert config.max_iterations == OH_MAX_ITERATIONS
|
||||
|
||||
|
||||
def test_cli_args_override_defaults():
|
||||
"""Test that CLI arguments override default values."""
|
||||
|
||||
# Create mock args with custom values
|
||||
mock_args = MagicMock()
|
||||
mock_args.config_file = None
|
||||
mock_args.llm_config = None
|
||||
mock_args.agent_cls = 'CustomAgent'
|
||||
mock_args.max_iterations = 50
|
||||
|
||||
# Load config
|
||||
with patch(
|
||||
'openhands.core.config.utils.load_openhands_config',
|
||||
return_value=OpenHandsConfig(),
|
||||
):
|
||||
config = setup_config_from_args(mock_args)
|
||||
|
||||
# Verify custom values are used instead of defaults
|
||||
assert config.default_agent == 'CustomAgent'
|
||||
assert config.max_iterations == 50
|
||||
|
||||
|
||||
def test_cli_args_none_uses_config_toml_values():
|
||||
"""Test that when CLI args agent_cls and max_iterations are None, config.toml values are used."""
|
||||
|
||||
# Create mock args with None values for agent_cls and max_iterations
|
||||
mock_args = MagicMock()
|
||||
mock_args.config_file = None
|
||||
mock_args.llm_config = None
|
||||
mock_args.agent_cls = None
|
||||
mock_args.max_iterations = None
|
||||
|
||||
# Create a config with specific values from config.toml
|
||||
config_from_toml = OpenHandsConfig()
|
||||
config_from_toml.default_agent = 'ConfigTomlAgent'
|
||||
config_from_toml.max_iterations = 100
|
||||
|
||||
# Load config
|
||||
with patch(
|
||||
'openhands.core.config.utils.load_openhands_config',
|
||||
return_value=config_from_toml,
|
||||
):
|
||||
config = setup_config_from_args(mock_args)
|
||||
|
||||
# Verify config.toml values are preserved when CLI args are None
|
||||
assert config.default_agent == 'ConfigTomlAgent'
|
||||
assert config.max_iterations == 100
|
||||
|
||||
@@ -13,6 +13,7 @@ from openhands.integrations.service_types import (
|
||||
Repository,
|
||||
)
|
||||
from openhands.microagent.types import MicroagentContentResponse
|
||||
from openhands.server.dependencies import check_session_api_key
|
||||
from openhands.server.routes.git import app as git_app
|
||||
from openhands.server.user_auth import (
|
||||
get_access_token,
|
||||
@@ -49,10 +50,15 @@ def test_client():
|
||||
def mock_get_user_id():
|
||||
return 'test_user'
|
||||
|
||||
def mock_check_session_api_key():
|
||||
# Mock session API key check to always pass for tests
|
||||
return None
|
||||
|
||||
# Override the dependencies in the app
|
||||
app.dependency_overrides[get_provider_tokens] = mock_get_provider_tokens
|
||||
app.dependency_overrides[get_access_token] = mock_get_access_token
|
||||
app.dependency_overrides[get_user_id] = mock_get_user_id
|
||||
app.dependency_overrides[check_session_api_key] = mock_check_session_api_key
|
||||
|
||||
yield TestClient(app)
|
||||
|
||||
|
||||
@@ -46,24 +46,32 @@ def test_localhost_cors_middleware_init_without_env_var():
|
||||
|
||||
|
||||
def test_localhost_cors_middleware_is_allowed_origin_localhost(app):
|
||||
"""Test that localhost origins are allowed regardless of port."""
|
||||
app.add_middleware(LocalhostCORSMiddleware)
|
||||
client = TestClient(app)
|
||||
"""Test that localhost origins are allowed regardless of port when no specific origins are configured."""
|
||||
# Test without setting PERMITTED_CORS_ORIGINS to trigger localhost behavior
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
app.add_middleware(LocalhostCORSMiddleware)
|
||||
client = TestClient(app)
|
||||
|
||||
# Test with localhost
|
||||
response = client.get('/test', headers={'Origin': 'http://localhost:8000'})
|
||||
assert response.status_code == 200
|
||||
assert response.headers['access-control-allow-origin'] == 'http://localhost:8000'
|
||||
# Test with localhost
|
||||
response = client.get('/test', headers={'Origin': 'http://localhost:8000'})
|
||||
assert response.status_code == 200
|
||||
assert (
|
||||
response.headers['access-control-allow-origin'] == 'http://localhost:8000'
|
||||
)
|
||||
|
||||
# Test with different port
|
||||
response = client.get('/test', headers={'Origin': 'http://localhost:3000'})
|
||||
assert response.status_code == 200
|
||||
assert response.headers['access-control-allow-origin'] == 'http://localhost:3000'
|
||||
# Test with different port
|
||||
response = client.get('/test', headers={'Origin': 'http://localhost:3000'})
|
||||
assert response.status_code == 200
|
||||
assert (
|
||||
response.headers['access-control-allow-origin'] == 'http://localhost:3000'
|
||||
)
|
||||
|
||||
# Test with 127.0.0.1
|
||||
response = client.get('/test', headers={'Origin': 'http://127.0.0.1:8000'})
|
||||
assert response.status_code == 200
|
||||
assert response.headers['access-control-allow-origin'] == 'http://127.0.0.1:8000'
|
||||
# Test with 127.0.0.1
|
||||
response = client.get('/test', headers={'Origin': 'http://127.0.0.1:8000'})
|
||||
assert response.status_code == 200
|
||||
assert (
|
||||
response.headers['access-control-allow-origin'] == 'http://127.0.0.1:8000'
|
||||
)
|
||||
|
||||
|
||||
def test_localhost_cors_middleware_is_allowed_origin_non_localhost(app):
|
||||
@@ -87,14 +95,15 @@ def test_localhost_cors_middleware_is_allowed_origin_non_localhost(app):
|
||||
|
||||
def test_localhost_cors_middleware_missing_origin(app):
|
||||
"""Test behavior when Origin header is missing."""
|
||||
app.add_middleware(LocalhostCORSMiddleware)
|
||||
client = TestClient(app)
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
app.add_middleware(LocalhostCORSMiddleware)
|
||||
client = TestClient(app)
|
||||
|
||||
# Test without Origin header
|
||||
response = client.get('/test')
|
||||
assert response.status_code == 200
|
||||
# There should be no access-control-allow-origin header
|
||||
assert 'access-control-allow-origin' not in response.headers
|
||||
# Test without Origin header
|
||||
response = client.get('/test')
|
||||
assert response.status_code == 200
|
||||
# There should be no access-control-allow-origin header
|
||||
assert 'access-control-allow-origin' not in response.headers
|
||||
|
||||
|
||||
def test_localhost_cors_middleware_inheritance():
|
||||
|
||||
Reference in New Issue
Block a user