Compare commits

..

6 Commits

Author SHA1 Message Date
mamoodi
4aae68ae53 Merge branch 'main' into rel540 2025-08-18 18:39:23 -04:00
mamoodi
aa7f78ce55 Merge branch 'main' into rel540 2025-08-18 17:01:52 -04:00
mamoodi
9f0afa9e67 Merge branch 'main' into rel540 2025-08-18 14:47:31 -04:00
mamoodi
9d17a5e520 Merge branch 'main' into rel540 2025-08-18 14:28:28 -04:00
mamoodi
4f409108ff Merge branch 'main' into rel540 2025-08-18 13:56:54 -04:00
mamoodi
fd7b49c6ba Release 0.54.0 2025-08-18 10:44:43 -04:00
192 changed files with 3275 additions and 6901 deletions

View File

@@ -22,7 +22,7 @@ jobs:
uses: actions/checkout@v4
- name: Install poetry via pipx
uses: abatilo/actions-poetry@v4
uses: abatilo/actions-poetry@v3
with:
poetry-version: 2.1.3

View File

@@ -73,7 +73,7 @@ jobs:
- name: Install Python dependencies using Poetry
run: poetry install --with dev,test,runtime
- name: Run Windows unit tests
run: poetry run pytest -svv tests/unit/runtime/utils/test_windows_bash.py
run: poetry run pytest -svv tests/unit/test_windows_bash.py
env:
PYTHONPATH: ".;$env:PYTHONPATH"
DEBUG: "1"

View File

@@ -1,50 +0,0 @@
name: Welcome Good First Issue
on:
issues:
types: [labeled]
permissions:
issues: write
jobs:
comment-on-good-first-issue:
if: github.event.label.name == 'good first issue'
runs-on: ubuntu-latest
steps:
- name: Check if welcome comment already exists
id: check_comment
uses: actions/github-script@v7
with:
result-encoding: string
script: |
const issueNumber = context.issue.number;
const comments = await github.rest.issues.listComments({
...context.repo,
issue_number: issueNumber
});
const alreadyCommented = comments.data.some(
(comment) =>
comment.body.includes('<!-- auto-comment:good-first-issue -->')
);
return alreadyCommented ? 'true' : 'false';
- name: Leave welcome comment
if: steps.check_comment.outputs.result == 'false'
uses: actions/github-script@v7
with:
script: |
const repoUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}`;
await github.rest.issues.createComment({
...context.repo,
issue_number: context.issue.number,
body: "🙌 **Hey there, future contributor!** 🙌\n\n" +
"This issue has been labeled as **good first issue**, which means it's a great place to get started with the OpenHands project.\n\n" +
"If you're interested in working on it, feel free to! No need to ask for permission.\n\n" +
"Be sure to check out our [development setup guide](" + repoUrl + "/blob/main/Development.md) to get your environment set up, and follow our [contribution guidelines](" + repoUrl + "/blob/main/CONTRIBUTING.md) when you're ready to submit a fix.\n\n" +
"🙌 Happy hacking! 🙌\n\n" +
"<!-- auto-comment:good-first-issue -->"
});

View File

@@ -21,7 +21,7 @@ ENV POETRY_NO_INTERACTION=1 \
POETRY_CACHE_DIR=/tmp/poetry_cache
RUN apt-get update -y \
&& apt-get install -y curl make git build-essential jq gettext \
&& apt-get install -y curl make git build-essential \
&& python3 -m pip install poetry --break-system-packages
COPY pyproject.toml poetry.lock ./

File diff suppressed because it is too large Load Diff

View File

@@ -129,7 +129,7 @@ docker run -it \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.54 \
python -m openhands.cli.entry --override-cli-mode true
python -m openhands.cli.main --override-cli-mode true
```
<Note>

View File

@@ -130,28 +130,3 @@ docker run # ... \
<Note>
**Docker Desktop Required**: Network isolation features, including custom networks and `host.docker.internal` routing, require Docker Desktop. Docker Engine alone does not support these features on localhost across custom networks. If you're using Docker Engine without Docker Desktop, network isolation may not work as expected.
</Note>
### Sidecar Containers
If you want to run sidecar containers to the sandbox 'runner' containers without exposing the sandbox containers to the host network, you can use the `SANDBOX_ADDITIONAL_NETWORKS` environment variable to specify additional Docker network names that should be added to the sandbox containers.
```bash
docker network create openhands-sccache
docker run -d \
--hostname openhandsredis \
--network openhands-sccache \
redis
docker run # ...
-e SANDBOX_ADDITIONAL_NETWORKS='["openhands-sccache"]' \
# ...
```
Then all sandbox instances will have to access a shared redis instance at `openhandsredis:6379`.
#### Docker Compose gotcha
Note that Docker Compose adds a prefix (a scope) by default to created networks, which is not taken into account by the additional networks config. Therefore when using docker compose you have to either:
- specify a network name via the `name` field to remove the scoping (https://docs.docker.com/reference/compose-file/networks/#name)
- or provide the scope within the given config (e.g. `SANDBOX_ADDITIONAL_NETWORKS: '["myscope_openhands-sccache"]'` where `myscope` is the docker-compose assigned prefix).

View File

@@ -10,7 +10,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -147,7 +146,7 @@ def process_instance(
logger.info(f'Final message: {final_message} | Ground truth: {instance["text"]}')
test_result = game.reward()
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -18,7 +18,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -274,7 +273,7 @@ def process_instance(
# remove when it becomes unnecessary
histories = compatibility_for_eval_history_pairs(state.history)
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Save the output
output = EvalOutput(

View File

@@ -17,7 +17,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -247,7 +246,7 @@ def process_instance(
# for compatibility with the existing output format, we can remake the pairs here
# remove when it becomes unnecessary
histories = compatibility_for_eval_history_pairs(state.history)
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Save the output
output = EvalOutput(

View File

@@ -15,7 +15,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -295,7 +294,7 @@ def process_instance(
raise ValueError('State should not be None.')
test_result = complete_runtime(runtime, instance)
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here
# remove when it becomes unnecessary

View File

@@ -18,7 +18,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -423,7 +422,7 @@ def process_instance(
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -11,7 +11,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -89,7 +88,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here
# remove when it becomes unnecessary

View File

@@ -16,7 +16,6 @@ from evaluation.utils.shared import (
assert_and_raise,
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -481,7 +480,7 @@ def process_instance(
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
histories = [event_to_dict(event) for event in state.history]
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Save the output
output = EvalOutput(

View File

@@ -17,7 +17,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -295,7 +294,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
test_result = complete_runtime(state)
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)

View File

@@ -22,7 +22,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -270,7 +269,7 @@ Here is the task:
'model_answer': model_answer,
'ground_truth': instance['Final answer'],
}
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -109,7 +108,7 @@ def process_instance(
# attempt to parse model_answer
ast_eval_fn = instance['ast_eval']
correct, hallucination = ast_eval_fn(instance_id, model_answer_raw)
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
logger.info(
f'Final message: {model_answer_raw} | Correctness: {correct} | Hallucination: {hallucination}'
)

View File

@@ -30,7 +30,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -293,7 +292,7 @@ Ok now its time to start solving the question. Good luck!
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Save the output
output = EvalOutput(

View File

@@ -23,7 +23,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -249,7 +248,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
test_result = complete_runtime(runtime, instance)
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)

View File

@@ -22,7 +22,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -336,7 +335,7 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think
)
)
assert state is not None
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else {}
test_result = complete_runtime(runtime, instance)

View File

@@ -10,7 +10,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -248,7 +247,7 @@ def process_instance(
)
test_result['final_message'] = final_message
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here
# remove when it becomes unnecessary

View File

@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -175,7 +174,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Instruction is the first message from the USER
instruction = ''

View File

@@ -15,7 +15,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -206,7 +205,7 @@ def process_instance(
task_state = state.extra_data['task_state']
logger.info('Task state: ' + str(task_state.to_dict()))
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -26,7 +26,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -251,7 +250,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
)
)
assert state is not None
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else {}
test_result = complete_runtime(runtime)

View File

@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -219,7 +218,7 @@ If the program uses some packages that are incompatible, please figure out alter
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -21,7 +21,6 @@ from evaluation.utils.shared import (
EvalException,
EvalMetadata,
EvalOutput,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -180,7 +179,7 @@ def process_instance(
raise ValueError('State should not be None.')
histories = [event_to_dict(event) for event in state.history]
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Save the output
instruction = message_action.content

View File

@@ -11,7 +11,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -135,7 +134,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
correct = eval_answer(str(model_answer_raw), str(answer))
logger.info(f'Final message: {model_answer_raw} | Correctness: {correct}')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -180,7 +179,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Instruction obtained from the first message from the USER
instruction = ''

View File

@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -164,7 +163,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Instruction is the first message from the USER
instruction = ''

View File

@@ -9,7 +9,6 @@ from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -136,7 +135,7 @@ def process_instance(
assert len(histories) > 0, 'History should not be empty'
test_result: TestResult = test_class.verify_result(runtime, histories)
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
finally:
runtime.close()

View File

@@ -668,23 +668,8 @@ def is_fatal_runtime_error(error: str | None) -> bool:
def get_metrics(state: State) -> dict[str, Any]:
"""Extract metrics for evaluations.
Prefer ConversationStats (source of truth) and fall back to state.metrics for
backward compatibility.
"""
metrics: dict[str, Any]
try:
if getattr(state, 'conversation_stats', None):
combined = state.conversation_stats.get_combined_metrics()
metrics = combined.get()
elif getattr(state, 'metrics', None):
metrics = state.metrics.get()
else:
metrics = {}
except Exception:
metrics = state.metrics.get() if getattr(state, 'metrics', None) else {}
"""Extract metrics from the state."""
metrics = state.metrics.get() if state.metrics else {}
metrics['condenser'] = get_condensation_metadata(state)
return metrics

View File

@@ -232,16 +232,13 @@ describe("RepositorySelectionForm", () => {
renderForm();
const dropdown = await screen.findByTestId("repo-dropdown");
const input = dropdown.querySelector(
'input[type="text"]',
) as HTMLInputElement;
const input = dropdown.querySelector('input[type="text"]') as HTMLInputElement;
expect(input).toBeInTheDocument();
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
"kubernetes/kubernetes",
3,
"github",
);
});
@@ -271,16 +268,13 @@ describe("RepositorySelectionForm", () => {
renderForm();
const dropdown = await screen.findByTestId("repo-dropdown");
const input = dropdown.querySelector(
'input[type="text"]',
) as HTMLInputElement;
const input = dropdown.querySelector('input[type="text"]') as HTMLInputElement;
expect(input).toBeInTheDocument();
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
"kubernetes/kubernetes",
3,
"github",
);
});
});

View File

@@ -444,38 +444,28 @@ describe("MicroagentManagement", () => {
expect(filePath2).toBeInTheDocument();
});
it("should render add microagent button", async () => {
it("should display add microagent button in repository accordion", async () => {
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(screen.getByTestId("repository-name-tooltip")).toBeInTheDocument();
});
// Check that add microagent buttons are present
const addButtons = screen.getAllByTestId("add-microagent-button");
expect(addButtons.length).toBeGreaterThan(0);
});
it("should open modal when add button is clicked", async () => {
it("should open add microagent modal when add button is clicked", async () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(screen.getByTestId("repository-name-tooltip")).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1302,18 +1292,11 @@ describe("MicroagentManagement", () => {
it("should render add microagent button", async () => {
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Check that add microagent buttons are present
const addButtons = screen.getAllByTestId("add-microagent-button");
expect(addButtons.length).toBeGreaterThan(0);
@@ -1323,18 +1306,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1385,18 +1361,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1416,18 +1385,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1446,18 +1408,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1486,18 +1441,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1520,18 +1468,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1553,18 +1494,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);

View File

@@ -1,9 +1,7 @@
import { useCallback, useMemo, useState } from "react";
import { useCallback, useMemo, useRef } from "react";
import { useTranslation } from "react-i18next";
import { Provider } from "../../types/settings";
import { useGitRepositories } from "../../hooks/query/use-git-repositories";
import { useSearchRepositories } from "../../hooks/query/use-search-repositories";
import { useDebounce } from "../../hooks/use-debounce";
import OpenHands from "../../api/open-hands";
import { GitRepository } from "../../types/git";
import {
@@ -21,6 +19,10 @@ export interface GitRepositoryDropdownProps {
onChange?: (repository?: GitRepository) => void;
}
interface SearchCache {
[key: string]: GitRepository[];
}
export function GitRepositoryDropdown({
provider,
value,
@@ -31,20 +33,6 @@ export function GitRepositoryDropdown({
onChange,
}: GitRepositoryDropdownProps) {
const { t } = useTranslation();
const [searchInput, setSearchInput] = useState("");
const debouncedSearchInput = useDebounce(searchInput, 300);
// Process search input to handle URLs
const processedSearchInput = useMemo(() => {
if (debouncedSearchInput.startsWith("https://")) {
const match = debouncedSearchInput.match(
/https:\/\/[^/]+\/([^/]+\/[^/]+)/,
);
return match ? match[1] : debouncedSearchInput;
}
return debouncedSearchInput;
}, [debouncedSearchInput]);
const {
data,
fetchNextPage,
@@ -57,10 +45,6 @@ export function GitRepositoryDropdown({
enabled: !disabled,
});
// Search query for processed input (handles URLs)
const { data: searchData, isLoading: isSearchLoading } =
useSearchRepositories(processedSearchInput, provider);
const allOptions: AsyncSelectOption[] = useMemo(
() =>
data?.pages
@@ -74,83 +58,75 @@ export function GitRepositoryDropdown({
[data],
);
const searchOptions: AsyncSelectOption[] = useMemo(
() =>
searchData
? searchData.map((repo) => ({
value: repo.id,
label: repo.full_name,
}))
: [],
[searchData],
);
// Keep track of search results
const searchCache = useRef<SearchCache>({});
const selectedOption = useMemo(() => {
// First check in loaded pages
const option = allOptions.find((opt) => opt.value === value);
if (option) return option;
// If not found, check in search results
const searchOption = searchOptions.find((opt) => opt.value === value);
if (searchOption) return searchOption;
// If not found, check in search cache
const repo = Object.values(searchCache.current)
.flat()
.find((r) => r.id === value);
if (repo) {
return {
value: repo.id,
label: repo.full_name,
};
}
return null;
}, [allOptions, searchOptions, value]);
}, [allOptions, value]);
const loadOptions = useCallback(
async (inputValue: string): Promise<AsyncSelectOption[]> => {
// Update search input to trigger debounced search
setSearchInput(inputValue);
// If empty input, show all loaded options
if (!inputValue.trim()) {
return allOptions;
}
// For very short inputs, do local filtering
if (inputValue.length < 2) {
return allOptions.filter((option) =>
option.label.toLowerCase().includes(inputValue.toLowerCase()),
);
}
// Handle URL inputs by performing direct search
// If it looks like a URL, extract the repo name and search
if (inputValue.startsWith("https://")) {
const match = inputValue.match(/https:\/\/[^/]+\/([^/]+\/[^/]+)/);
if (match) {
const repoName = match[1];
try {
// Perform direct search for URL-based inputs
const repositories = await OpenHands.searchGitRepositories(
repoName,
3,
provider,
);
return repositories.map((repo) => ({
value: repo.full_name,
label: repo.full_name,
data: repo,
}));
} catch (error) {
// Fall back to local filtering if search fails
return allOptions.filter((option) =>
option.label.toLowerCase().includes(repoName.toLowerCase()),
);
}
const searchResults = await OpenHands.searchGitRepositories(
repoName,
3,
);
// Cache the search results
searchCache.current[repoName] = searchResults;
return searchResults.map((repo) => ({
value: repo.id,
label: repo.full_name,
}));
}
}
// For regular text inputs, use hook-based search results if available
if (searchOptions.length > 0 && processedSearchInput === inputValue) {
return searchOptions;
// For any other input, search via API
if (inputValue.length >= 2) {
// Only search if at least 2 characters
const searchResults = await OpenHands.searchGitRepositories(
inputValue,
10,
);
// Cache the search results
searchCache.current[inputValue] = searchResults;
return searchResults.map((repo) => ({
value: repo.id,
label: repo.full_name,
}));
}
// Fallback to local filtering while search is loading
// For very short inputs, do local filtering
return allOptions.filter((option) =>
option.label.toLowerCase().includes(inputValue.toLowerCase()),
);
},
[allOptions, searchOptions, processedSearchInput, provider],
[allOptions],
);
const handleChange = (option: AsyncSelectOption | null) => {
@@ -166,7 +142,9 @@ export function GitRepositoryDropdown({
// If not found, check in search results
if (!repo) {
repo = searchData?.find((r) => r.id === option.value);
repo = Object.values(searchCache.current)
.flat()
.find((r) => r.id === option.value);
}
onChange?.(repo);
@@ -189,7 +167,7 @@ export function GitRepositoryDropdown({
errorMessage={errorMessage}
disabled={disabled}
isClearable={false}
isLoading={isLoading || isFetchingNextPage || isSearchLoading}
isLoading={isLoading || isLoading || isFetchingNextPage}
cacheOptions
defaultOptions={allOptions}
onChange={handleChange}

View File

@@ -17,7 +17,7 @@ export function MicroagentManagementAccordionTitle({
<TooltipButton
tooltip={repository.full_name}
ariaLabel={repository.full_name}
className="text-white text-base font-normal bg-transparent p-0 min-w-0 h-auto cursor-pointer truncate max-w-[200px] translate-y-[-1px]"
className="text-white text-base font-normal bg-transparent p-0 min-w-0 h-auto cursor-pointer truncate max-w-[232px]"
testId="repository-name-tooltip"
placement="bottom"
>

View File

@@ -7,6 +7,8 @@ import {
} from "#/state/microagent-management-slice";
import { RootState } from "#/store";
import { GitRepository } from "#/types/git";
import PlusIcon from "#/icons/plus.svg?react";
import { TooltipButton } from "#/components/shared/buttons/tooltip-button";
interface MicroagentManagementAddMicroagentButtonProps {
repository: GitRepository;
@@ -23,22 +25,23 @@ export function MicroagentManagementAddMicroagentButton({
const dispatch = useDispatch();
const handleClick = (e: React.MouseEvent<HTMLButtonElement>) => {
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
e.stopPropagation();
dispatch(setAddMicroagentModalVisible(!addMicroagentModalVisible));
dispatch(setSelectedRepository(repository));
};
return (
<button
type="button"
onClick={handleClick}
className="translate-y-[-1px]"
data-testid="add-microagent-button"
>
<span className="text-sm font-normal leading-5 text-[#8480FF] cursor-pointer hover:text-[#6C63FF] transition-colors duration-200">
{t(I18nKey.COMMON$ADD_MICROAGENT)}
</span>
</button>
<div onClick={handleClick}>
<TooltipButton
tooltip={t(I18nKey.COMMON$ADD_MICROAGENT)}
ariaLabel={t(I18nKey.COMMON$ADD_MICROAGENT)}
className="p-0 min-w-0 h-6 w-6 flex items-center justify-center bg-transparent cursor-pointer"
testId="add-microagent-button"
placement="bottom"
>
<PlusIcon width={22} height={22} />
</TooltipButton>
</div>
);
}

View File

@@ -1,5 +1,4 @@
import React, { useEffect, useState } from "react";
import { useTranslation } from "react-i18next";
import { useDispatch, useSelector } from "react-redux";
import { MicroagentManagementSidebar } from "./microagent-management-sidebar";
import { MicroagentManagementMain } from "./microagent-management-main";
@@ -26,12 +25,6 @@ import { GitRepository } from "#/types/git";
import { queryClient } from "#/query-client-config";
import { Provider } from "#/types/settings";
import { MicroagentManagementLearnThisRepoModal } from "./microagent-management-learn-this-repo-modal";
import {
displaySuccessToast,
displayErrorToast,
} from "#/utils/custom-toast-handlers";
import { getFirstPRUrl } from "#/utils/parse-pr-url";
import { I18nKey } from "#/i18n/declaration";
// Handle error events
const isErrorEvent = (evt: unknown): evt is { error: true; message: string } =>
@@ -119,8 +112,6 @@ export function MicroagentManagementContent() {
learnThisRepoModalVisible,
} = useSelector((state: RootState) => state.microagentManagement);
const { t } = useTranslation();
const dispatch = useDispatch();
const { createConversationAndSubscribe, isPending } =
@@ -168,37 +159,6 @@ export function MicroagentManagementContent() {
? (selectedRepository as GitRepository).full_name
: "";
// Check if agent is running and ready to work
if (
isOpenHandsEvent(socketEvent) &&
isAgentStateChangeObservation(socketEvent) &&
socketEvent.extras.agent_state === AgentState.RUNNING
) {
displaySuccessToast(
t(I18nKey.MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT),
);
}
// Check if agent has finished and we have a PR
if (isOpenHandsEvent(socketEvent) && isFinishAction(socketEvent)) {
const prUrl = getFirstPRUrl(socketEvent.args.final_thought || "");
if (prUrl) {
displaySuccessToast(
t(I18nKey.MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW),
);
} else {
// Agent finished but no PR found
displaySuccessToast(t(I18nKey.MICROAGENT_MANAGEMENT$PR_NOT_CREATED));
}
}
// Handle error events
if (isErrorEvent(socketEvent) || isAgentStatusError(socketEvent)) {
displayErrorToast(
t(I18nKey.MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT),
);
}
if (shouldInvalidateConversationsList(socketEvent)) {
invalidateConversationsList(repositoryName);
}

View File

@@ -65,18 +65,6 @@ export function MicroagentManagementRepoMicroagents({
}
}, [conversations]);
useEffect(
() => () => {
dispatch(
setSelectedMicroagentItem({
microagent: null,
conversation: null,
}),
);
},
[],
);
// Show loading only when both queries are loading
const isLoading = isLoadingMicroagents || isLoadingConversations;
@@ -94,7 +82,7 @@ export function MicroagentManagementRepoMicroagents({
// If there's an error with microagents, show the learn this repo component
if (isError) {
return (
<div>
<div className="pb-4">
<MicroagentManagementLearnThisRepo repository={repository} />
</div>
);
@@ -105,7 +93,7 @@ export function MicroagentManagementRepoMicroagents({
const totalItems = numberOfMicroagents + numberOfConversations;
return (
<div>
<div className="pb-4">
{totalItems === 0 && (
<MicroagentManagementLearnThisRepo repository={repository} />
)}

View File

@@ -97,10 +97,8 @@ export function MicroagentManagementRepositories({
variant="splitted"
className="w-full px-0 gap-3"
itemClasses={{
base: "shadow-none bg-transparent cursor-pointer px-0",
trigger: "cursor-pointer gap-2 py-3",
indicator:
"flex items-center justify-center p-0.5 pr-[3px] text-white hover:bg-[#454545] rounded transition-colors duration-200 rotate-180",
base: "shadow-none bg-transparent border border-[#ffffff40] rounded-[6px] cursor-pointer",
trigger: "cursor-pointer gap-1",
}}
selectionMode="multiple"
>

View File

@@ -23,7 +23,7 @@ export function ModalBackdrop({ children, onClose }: ModalBackdropProps) {
<div className="fixed inset-0 flex items-center justify-center z-20">
<div
onClick={handleClick}
className="fixed inset-0 bg-black opacity-60"
className="fixed inset-0 bg-black bg-opacity-80"
/>
<div className="relative">{children}</div>
</div>

View File

@@ -810,8 +810,4 @@ export enum I18nKey {
PROJECT_MANAGEMENT$CONFIGURE_MODAL_DESCRIPTION = "PROJECT_MANAGEMENT$CONFIGURE_MODAL_DESCRIPTION",
PROJECT_MANAGEMENT$IMPORTANT_WORKSPACE_INTEGRATION = "PROJECT_MANAGEMENT$IMPORTANT_WORKSPACE_INTEGRATION",
SETTINGS = "SETTINGS",
MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT = "MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT",
MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW = "MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW",
MICROAGENT_MANAGEMENT$PR_NOT_CREATED = "MICROAGENT_MANAGEMENT$PR_NOT_CREATED",
MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT = "MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT",
}

View File

@@ -12958,69 +12958,5 @@
"tr": "A server with this URL already exists for the selected type",
"de": "A server with this URL already exists for the selected type",
"uk": "A server with this URL already exists for the selected type"
},
"MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT": {
"en": "Opening a PR to create the microagent for you...",
"ja": "マイクロエージェントを作成するためのプルリクエストを作成しています...",
"zh-CN": "正在为您创建微代理的拉取请求...",
"zh-TW": "正在為您建立微代理的拉取請求...",
"ko-KR": "마이크로에이전트를 생성하기 위한 PR을 열고 있습니다...",
"no": "Åpner en PR for å opprette mikroagenten for deg...",
"it": "Apertura di una PR per creare il microagente per te...",
"pt": "Abrindo um PR para criar o microagente para você...",
"es": "Abriendo un PR para crear el microagente para ti...",
"ar": "يتم فتح طلب سحب لإنشاء الوكيل الدقيق من أجلك...",
"fr": "Ouverture d'une PR pour créer le microagent pour vous...",
"tr": "Sizin için mikro ajanı oluşturmak üzere bir PR açılıyor...",
"de": "Es wird ein PR geöffnet, um den Microagent für Sie zu erstellen...",
"uk": "Відкривається PR для створення мікроагента для вас..."
},
"MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW": {
"en": "PR is ready for review! The microagent has been created successfully.",
"ja": "PRのレビューが可能ですマイクロエージェントが正常に作成されました。",
"zh-CN": "PR已准备好审核微代理已成功创建。",
"zh-TW": "PR 已準備好審查!微代理已成功建立。",
"ko-KR": "PR이 검토를 위해 준비되었습니다! 마이크로에이전트가 성공적으로 생성되었습니다.",
"no": "PR er klar for gjennomgang! Mikroagenten har blitt opprettet.",
"it": "La PR è pronta per la revisione! Il microagente è stato creato con successo.",
"pt": "PR pronto para revisão! O microagente foi criado com sucesso.",
"es": "¡La PR está lista para revisión! El microagente se ha creado correctamente.",
"ar": "طلب السحب جاهز للمراجعة! تم إنشاء الوكيل الدقيق بنجاح.",
"fr": "La PR est prête pour révision ! Le microagent a été créé avec succès.",
"tr": "PR incelemeye hazır! Mikro ajan başarıyla oluşturuldu.",
"de": "PR ist bereit zur Überprüfung! Der Microagent wurde erfolgreich erstellt.",
"uk": "PR готовий до перегляду! Мікроагента успішно створено."
},
"MICROAGENT_MANAGEMENT$PR_NOT_CREATED": {
"en": "The agent has finished its task but was unable to create a PR.",
"ja": "エージェントはタスクを完了しましたが、PRを作成できませんでした。",
"zh-CN": "代理已完成任务,但无法创建 PR。",
"zh-TW": "代理已完成任務,但無法建立 PR。",
"ko-KR": "에이전트가 작업을 완료했지만 PR을 생성할 수 없었습니다.",
"no": "Agenten har fullført oppgaven, men klarte ikke å opprette en PR.",
"it": "L'agente ha terminato il suo compito ma non è riuscito a creare una PR.",
"pt": "O agente concluiu sua tarefa, mas não conseguiu criar um PR.",
"es": "El agente ha terminado su tarea pero no pudo crear un PR.",
"ar": "أكمل الوكيل مهمته لكنه لم يتمكن من إنشاء طلب سحب (PR).",
"fr": "L'agent a terminé sa tâche mais n'a pas pu créer de PR.",
"tr": "Ajan görevini tamamladı ancak bir PR oluşturamadı.",
"de": "Der Agent hat seine Aufgabe abgeschlossen, konnte aber keinen PR erstellen.",
"uk": "Агент завершив завдання, але не зміг створити PR."
},
"MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT": {
"en": "Something went wrong. Try initiating the microagent again.",
"ja": "問題が発生しました。もう一度マイクロエージェントを開始してください。",
"zh-CN": "出现了问题。请重试启动微代理。",
"zh-TW": "發生錯誤。請再次嘗試啟動微代理。",
"ko-KR": "문제가 발생했습니다. 마이크로에이전트를 다시 시작해 보세요.",
"no": "Noe gikk galt. Prøv å starte mikroagenten på nytt.",
"it": "Qualcosa è andato storto. Prova a iniziare di nuovo il microagente.",
"pt": "Algo deu errado. Tente iniciar o microagente novamente.",
"es": "Algo salió mal. Intenta iniciar el microagente de nuevo.",
"ar": "حدث خطأ ما. حاول بدء تشغيل الوكيل الدقيق مرة أخرى.",
"fr": "Une erreur s'est produite. Essayez de relancer le microagent.",
"tr": "Bir şeyler ters gitti. Mikro ajanı tekrar başlatmayı deneyin.",
"de": "Etwas ist schiefgelaufen. Versuchen Sie, den Microagenten erneut zu starten.",
"uk": "Щось пішло не так. Спробуйте ініціювати мікроагента ще раз."
}
}

View File

@@ -275,7 +275,7 @@ async def run_session(
if event.agent_state == AgentState.RUNNING:
display_agent_running_message()
start_pause_listener(loop, is_paused, event_stream, config)
start_pause_listener(loop, is_paused, event_stream)
def on_event(event: Event) -> None:
loop.create_task(on_event_async(event))

View File

@@ -87,9 +87,6 @@ COMMANDS = {
print_lock = threading.Lock()
# Lock to debounce sending Ctrl+C interrupts to the running command
_interrupt_lock: asyncio.Lock = asyncio.Lock()
pause_task: asyncio.Task | None = None # No more than one pause task
@@ -662,15 +659,6 @@ def display_help() -> None:
commands_html += f'<gold><b>{command}</b></gold> - <grey>{description}</grey>\n'
print_formatted_text(HTML(commands_html))
# Keyboard shortcuts section
print_formatted_text(HTML('\nKeyboard shortcuts:'))
shortcuts_html = (
'<gold><b>Ctrl+P</b></gold> - <grey>Pause the agent</grey>\n'
'<gold><b>Ctrl+C</b></gold> - <grey>Pause the agent; press twice quickly to interrupt a running command</grey>\n'
'<gold><b>Ctrl+D</b></gold> - <grey>Pause the agent</grey>\n'
)
print_formatted_text(HTML(shortcuts_html))
# Footer
print_formatted_text(
HTML(
@@ -876,13 +864,12 @@ async def read_confirmation_input(config: OpenHandsConfig) -> str:
def start_pause_listener(
loop: asyncio.AbstractEventLoop,
done_event: asyncio.Event,
event_stream: EventStream,
config: OpenHandsConfig,
event_stream,
) -> None:
global pause_task
if pause_task is None or pause_task.done():
pause_task = loop.create_task(
process_agent_pause(done_event, event_stream, config)
process_agent_pause(done_event, event_stream)
) # Create a task to track agent pause requests from the user
@@ -896,135 +883,16 @@ async def stop_pause_listener() -> None:
pause_task = None
def is_command_running(event_stream: EventStream) -> bool:
"""Check if a shell command is currently running using bounded reverse search.
We look at the latest relevant event (CmdRunAction or CmdOutputObservation):
- If it's a CmdOutputObservation with a finalized exit_code (>= 0), no command is running
- If it's a CmdOutputObservation with exit_code == -1, the command is still running (streaming)
- If it's a CmdRunAction (non-input), we assume a command has started and is running
"""
try:
from openhands.events.event_filter import EventFilter
filt = EventFilter(include_types=(CmdRunAction, CmdOutputObservation))
for ev in event_stream.search_events(reverse=True, filter=filt, limit=50):
if isinstance(ev, CmdOutputObservation):
return ev.metadata.exit_code == -1
if isinstance(ev, CmdRunAction):
if ev.is_input:
continue
return True
return False
except Exception:
# If detection fails for any reason, default to no running command
return False
async def _handle_command_interrupt(
event_stream: EventStream, config: OpenHandsConfig
) -> bool:
"""Handle command interruption with user confirmation.
Returns:
bool: True if the interrupt was handled, False if the user wants to pause the agent
"""
print_formatted_text('')
print_formatted_text(HTML('<gold>Command is currently running.</gold>'))
print_formatted_text('')
# Keep legacy behavior: single Ctrl+C pauses by default. Offer kill as opt-in.
choices = [
'Pause the agent (default)',
'Continue waiting for command to complete',
'Send interrupt to running command (Ctrl+C)',
]
# Use the passed-in config so we honor CLI settings like VI mode. Run the blocking UI off the loop.
selection = await asyncio.to_thread(
cli_confirm, config, 'What would you like to do?', choices, 0
)
if selection == 2: # Send interrupt to the running command
print_formatted_text('')
print_formatted_text(
HTML('<gold>Sending interrupt signal to running command...</gold>')
)
# Debounce rapid interrupts to avoid multiple concurrent dialogs/interrupts
if _interrupt_lock.locked():
print_formatted_text(HTML('<grey>Interrupt already sent; waiting…</grey>'))
return True
async with _interrupt_lock:
event_stream.add_event(
CmdRunAction(command='C-c', is_input=True),
EventSource.USER,
)
return True
elif selection == 1: # Continue waiting
print_formatted_text('')
print_formatted_text(
HTML('<gold>Continuing to wait for command completion...</gold>')
)
return True
else: # Pause the agent (selection == 0)
return False
async def _handle_interrupt_async(
event_stream: EventStream, done: asyncio.Event, config: OpenHandsConfig
) -> None:
"""Handle the interrupt asynchronously to avoid blocking the input handler."""
try:
handled = await _handle_command_interrupt(event_stream, config)
if not handled:
# User chose to pause the agent
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
except Exception as e:
# If something goes wrong, fall back to pausing the agent
print_formatted_text('')
print_formatted_text(HTML(f'<ansired>Error handling interrupt: {e}</ansired>'))
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
async def process_agent_pause(
done: asyncio.Event, event_stream: EventStream, config: OpenHandsConfig
) -> None:
async def process_agent_pause(done: asyncio.Event, event_stream: EventStream) -> None:
input = create_input()
# Double-press detection window for Ctrl+C to send interrupt to running command
CTRL_C_WINDOW_SECONDS = 0.4
ctrl_c_timer: asyncio.Task | None = None
async def pause_after_delay(delay: float) -> None:
try:
await asyncio.sleep(delay)
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
except asyncio.CancelledError:
# Timer canceled because a second Ctrl+C was detected; do nothing
pass
def keys_ready() -> None:
nonlocal ctrl_c_timer
for key_press in input.read_keys():
if key_press.key == Keys.ControlP or key_press.key == Keys.ControlD:
# Immediate pause
if (
key_press.key == Keys.ControlP
or key_press.key == Keys.ControlC
or key_press.key == Keys.ControlD
):
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
@@ -1032,47 +900,6 @@ async def process_agent_pause(
EventSource.USER,
)
done.set()
elif key_press.key == Keys.ControlC:
if is_command_running(event_stream):
# If a timer is already running, this is a double-press: send interrupt
if ctrl_c_timer and not ctrl_c_timer.done():
ctrl_c_timer.cancel()
ctrl_c_timer = None
if _interrupt_lock.locked():
print_formatted_text(
HTML('<grey>Interrupt already sent; waiting…</grey>')
)
continue
# Send Ctrl+C to the running command
async def send_interrupt() -> None:
async with _interrupt_lock:
print_formatted_text('')
print_formatted_text(
HTML(
'<gold>Sending interrupt signal to running command...</gold>'
)
)
event_stream.add_event(
CmdRunAction(command='C-c', is_input=True),
EventSource.USER,
)
asyncio.create_task(send_interrupt())
else:
# Start a short window; if no second press, pause
ctrl_c_timer = asyncio.create_task(
pause_after_delay(CTRL_C_WINDOW_SECONDS)
)
else:
# No command running: default immediate pause
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
try:
with input.raw_mode():

View File

@@ -172,6 +172,9 @@ class LLMConfig(BaseModel):
# Set reasoning_effort to 'high' by default for non-Gemini models
# Gemini models use optimized thinking budget when reasoning_effort is None
logger.debug(
f'Setting reasoning_effort for model {self.model} with reasoning_effort {self.reasoning_effort}'
)
if self.reasoning_effort is None and 'gemini-2.5-pro' not in self.model:
self.reasoning_effort = 'high'

View File

@@ -18,7 +18,6 @@ class SandboxConfig(BaseModel):
remote_runtime_enable_retries: Whether to enable retries (on recoverable errors like requests.ConnectionError) for the remote runtime API requests.
enable_auto_lint: Whether to enable auto-lint.
use_host_network: Whether to use the host network.
additional_networks: A list of additional Docker networks to connect to
runtime_binding_address: The binding address for the runtime ports. It specifies which network interface on the host machine Docker should bind the runtime ports to.
initialize_plugins: Whether to initialize plugins.
force_rebuild_runtime: Whether to force rebuild the runtime image.
@@ -66,7 +65,6 @@ class SandboxConfig(BaseModel):
default=False
) # once enabled, OpenHands would lint files after editing
use_host_network: bool = Field(default=False)
additional_networks: list[str] = Field(default=[])
runtime_binding_address: str = Field(default='0.0.0.0')
runtime_extra_build_args: list[str] | None = Field(default=None)
initialize_plugins: bool = Field(default=True)

View File

@@ -321,36 +321,6 @@ class GitHubService(BaseGitService, GitService, InstallationsService):
installations = response.get('installations', [])
return [str(i['id']) for i in installations]
async def get_user_organizations(self) -> list[str]:
"""Get list of organization logins that the user is a member of."""
url = f'{self.BASE_URL}/user/orgs'
try:
response, _ = await self._make_request(url)
orgs = [org['login'] for org in response]
return orgs
except Exception as e:
logger.warning(f'Failed to get user organizations: {e}')
return []
def _fuzzy_match_org_name(self, query: str, org_name: str) -> bool:
"""Check if query fuzzy matches organization name."""
query_lower = query.lower().replace('-', '').replace('_', '').replace(' ', '')
org_lower = org_name.lower().replace('-', '').replace('_', '').replace(' ', '')
# Exact match after normalization
if query_lower == org_lower:
return True
# Query is a substring of org name
if query_lower in org_lower:
return True
# Org name is a substring of query (less common but possible)
if org_lower in query_lower:
return True
return False
async def search_repositories(
self, query: str, per_page: int, sort: str, order: str, public: bool
) -> list[Repository]:
@@ -371,68 +341,21 @@ class GitHubService(BaseGitService, GitService, InstallationsService):
# Add is:public to the query to ensure we only search for public repositories
params['q'] = f'in:name {org}/{repo_name} is:public'
# Handle private repository searches
# Perhaps we should go through all orgs and the search for repos under every org
# Currently it will only search user repos, and org repos when '/' is in the name
if not public and '/' in query:
org, repo_query = query.split('/', 1)
query_with_user = f'org:{org} in:name {repo_query}'
params['q'] = query_with_user
elif not public:
# Expand search scope to include user's repositories and organizations they're a member of
user = await self.get_user()
user_orgs = await self.get_user_organizations()
params['q'] = f'in:name {query} user:{user.login}'
# Search in user repos and org repos separately
all_repos = []
# Search in user repositories
user_query = f'{query} user:{user.login}'
user_params = params.copy()
user_params['q'] = user_query
try:
user_response, _ = await self._make_request(url, user_params)
user_items = user_response.get('items', [])
all_repos.extend(user_items)
except Exception as e:
logger.warning(f'User search failed: {e}')
# Search for repos named "query" in each organization
for org in user_orgs:
org_query = f'{query} org:{org}'
org_params = params.copy()
org_params['q'] = org_query
try:
org_response, _ = await self._make_request(url, org_params)
org_items = org_response.get('items', [])
all_repos.extend(org_items)
except Exception as e:
logger.warning(f'Org {org} search failed: {e}')
# Also search for top repos from orgs that match the query name
for org in user_orgs:
if self._fuzzy_match_org_name(query, org):
org_repos_query = f'org:{org}'
org_repos_params = params.copy()
org_repos_params['q'] = org_repos_query
org_repos_params['sort'] = 'stars'
org_repos_params['per_page'] = 2 # Limit to first 2 repos
try:
org_repos_response, _ = await self._make_request(
url, org_repos_params
)
org_repo_items = org_repos_response.get('items', [])
all_repos.extend(org_repo_items)
except Exception as e:
logger.warning(f'Org repos search for {org} failed: {e}')
return [self._parse_repository(repo) for repo in all_repos]
# Default case (public search or slash query)
response, _ = await self._make_request(url, params)
repo_items = response.get('items', [])
return [self._parse_repository(repo) for repo in repo_items]
repos = [self._parse_repository(repo) for repo in repo_items]
return repos
async def execute_graphql_query(
self, query: str, variables: dict[str, Any]

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,8 @@
import * as path from "path";
import Mocha = require("mocha");
import { glob } from "glob"; // Updated for glob v9+ API
import Mocha = require("mocha"); // Changed import style
import glob = require("glob"); // Changed import style
export async function run(): Promise<void> {
export function run(): Promise<void> {
// Create the mocha test
const mocha = new Mocha({
// This should now work with the changed import
@@ -13,25 +13,33 @@ export async function run(): Promise<void> {
const testsRoot = path.resolve(__dirname, ".."); // Root of the /src/test folder (compiled to /out/test)
try {
return new Promise((c, e) => {
// Use glob to find all test files (ending with .test.js in the compiled output)
const files = await glob("**/**.test.js", { cwd: testsRoot });
// Add files to the test suite
files.forEach((f: string) => mocha.addFile(path.resolve(testsRoot, f)));
// Run the mocha test
return await new Promise<void>((resolve, reject) => {
mocha.run((failures: number) => {
if (failures > 0) {
reject(new Error(`${failures} tests failed.`));
} else {
resolve();
glob(
"**/**.test.js",
{ cwd: testsRoot },
(err: NodeJS.ErrnoException | null, files: string[]) => {
if (err) {
return e(err);
}
});
});
} catch (err) {
console.error(err);
throw err;
}
// Add files to the test suite
files.forEach((f: string) => mocha.addFile(path.resolve(testsRoot, f)));
try {
// Run the mocha test
mocha.run((failures: number) => {
if (failures > 0) {
e(new Error(`${failures} tests failed.`));
} else {
c();
}
});
} catch (err) {
console.error(err);
e(err);
}
},
);
});
}

View File

@@ -9,8 +9,8 @@ from openhands.core.logger import openhands_logger as logger
from openhands.llm.llm import (
LLM,
LLM_RETRY_EXCEPTIONS,
REASONING_EFFORT_SUPPORTED_MODELS,
)
from openhands.llm.model_features import get_features
from openhands.utils.shutdown_listener import should_continue
@@ -63,7 +63,7 @@ class AsyncLLM(LLM):
messages = kwargs['messages']
# Set reasoning effort for models that support it
if get_features(self.config.model).supports_reasoning_effort:
if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS:
kwargs['reasoning_effort'] = self.config.reasoning_effort
# ensure we work with a list of messages

View File

@@ -705,25 +705,6 @@ def _fix_stopword(content: str) -> str:
return content
def _normalize_parameter_tags(fn_body: str) -> str:
"""Normalize malformed parameter tags to the canonical format.
Some models occasionally emit malformed parameter tags like:
<parameter=command=str_replace</parameter>
instead of the correct:
<parameter=command>str_replace</parameter>
This function rewrites the malformed form into the correct one to allow
downstream parsing to succeed.
"""
# Replace '<parameter=name=value</parameter>' with '<parameter=name>value</parameter>'
return re.sub(
r'<parameter=([a-zA-Z0-9_]+)=([^<]*)</parameter>',
r'<parameter=\1>\2</parameter>',
fn_body,
)
def convert_non_fncall_messages_to_fncall_messages(
messages: list[dict],
tools: list[ChatCompletionToolParam],
@@ -871,7 +852,7 @@ def convert_non_fncall_messages_to_fncall_messages(
if fn_match:
fn_name = fn_match.group(1)
fn_body = _normalize_parameter_tags(fn_match.group(2))
fn_body = fn_match.group(2)
matching_tool = next(
(
tool['function']

View File

@@ -9,7 +9,6 @@ import httpx
from openhands.core.config import LLMConfig
from openhands.llm.metrics import Metrics
from openhands.llm.model_features import get_features
with warnings.catch_warnings():
warnings.simplefilter('ignore')
@@ -50,6 +49,79 @@ LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
LLMNoResponseError,
)
# cache prompt supporting models
# remove this when we gemini and deepseek are supported
CACHE_PROMPT_SUPPORTED_MODELS = [
'claude-3-7-sonnet-20250219',
'claude-sonnet-3-7-latest',
'claude-3.7-sonnet',
'claude-3-5-sonnet-20241022',
'claude-3-5-sonnet-20240620',
'claude-3-5-haiku-20241022',
'claude-3-haiku-20240307',
'claude-3-opus-20240229',
'claude-sonnet-4-20250514',
'claude-sonnet-4',
'claude-opus-4-20250514',
'claude-opus-4-1-20250805',
]
# function calling supporting models
FUNCTION_CALLING_SUPPORTED_MODELS = [
'claude-3-7-sonnet-20250219',
'claude-sonnet-3-7-latest',
'claude-3-5-sonnet',
'claude-3-5-sonnet-20240620',
'claude-3-5-sonnet-20241022',
'claude-3.5-haiku',
'claude-3-5-haiku-20241022',
'claude-sonnet-4-20250514',
'claude-sonnet-4',
'claude-opus-4-20250514',
'claude-opus-4-1-20250805',
'gpt-4o-mini',
'gpt-4o',
'o1-2024-12-17',
'o3-mini-2025-01-31',
'o3-mini',
'o3',
'o3-2025-04-16',
'o4-mini',
'o4-mini-2025-04-16',
'gemini-2.5-pro',
'gpt-4.1',
'kimi-k2-0711-preview',
'kimi-k2-instruct',
'Qwen3-Coder-480B-A35B-Instruct',
'qwen3-coder', # this will match both qwen3-coder-480b (openhands provider) and qwen3-coder (for openrouter)
'gpt-5',
'gpt-5-2025-08-07',
]
REASONING_EFFORT_SUPPORTED_MODELS = [
'o1-2024-12-17',
'o1',
'o3',
'o3-2025-04-16',
'o3-mini-2025-01-31',
'o3-mini',
'o4-mini',
'o4-mini-2025-04-16',
'gemini-2.5-flash',
'gemini-2.5-pro',
'gpt-5',
'gpt-5-2025-08-07',
'claude-opus-4-1-20250805', # we need to remove top_p for opus 4.1
]
MODELS_WITHOUT_STOP_WORDS = [
'o1-mini',
'o1-preview',
'o1',
'o1-2024-12-17',
'xai/grok-4-0709',
]
class LLM(RetryMixin, DebugMixin):
"""The LLM class represents a Language Model instance.
@@ -82,7 +154,6 @@ class LLM(RetryMixin, DebugMixin):
)
self.model_info: ModelInfo | None = None
self._function_calling_active: bool = False
self.retry_listener = retry_listener
if self.config.log_completions:
if self.config.log_completions_folder is None:
@@ -131,8 +202,10 @@ class LLM(RetryMixin, DebugMixin):
f'Rewrote openhands/{model_name} to {self.config.model} with base URL {self.config.base_url}'
)
features = get_features(self.config.model)
if features.supports_reasoning_effort:
if (
self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS
or self.config.model.split('/')[-1] in REASONING_EFFORT_SUPPORTED_MODELS
):
# For Gemini models, only map 'low' to optimized thinking budget
# Let other reasoning_effort values pass through to API as-is
if 'gemini-2.5-pro' in self.config.model:
@@ -166,20 +239,6 @@ class LLM(RetryMixin, DebugMixin):
elif 'gemini' in self.config.model.lower() and self.config.safety_settings:
kwargs['safety_settings'] = self.config.safety_settings
# Explicitly disable Anthropic extended thinking for Opus 4.1 to avoid
# requiring 'thinking' content blocks. See issue #10510.
if 'claude-opus-4-1' in self.config.model.lower():
kwargs['thinking'] = {'type': 'disabled'}
# Anthropic constraint: Opus models cannot accept both temperature and top_p
# Prefer temperature (drop top_p) if both are specified.
_model_lower = self.config.model.lower()
# Limit to Opus 4.1 specifically to avoid changing behavior of other Anthropic models
if ('claude-opus-4-1' in _model_lower) and (
'temperature' in kwargs and 'top_p' in kwargs
):
kwargs.pop('top_p', None)
self._completion = partial(
litellm_completion,
model=self.config.model,
@@ -253,7 +312,7 @@ class LLM(RetryMixin, DebugMixin):
# add stop words if the model supports it and stop words are not disabled
if (
get_features(self.config.model).supports_stop_words
self.config.model not in MODELS_WITHOUT_STOP_WORDS
and not self.config.disable_stop_word
):
kwargs['stop'] = STOP_WORDS
@@ -497,10 +556,17 @@ class LLM(RetryMixin, DebugMixin):
):
self.config.max_output_tokens = self.model_info['max_tokens']
# Initialize function calling using centralized model features
features = get_features(self.config.model)
# Initialize function calling capability
# Check if model name is in our supported list
model_name_supported = (
self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
)
# Handle native_tool_calling user-defined configuration
if self.config.native_tool_calling is None:
self._function_calling_active = features.supports_function_calling
self._function_calling_active = model_name_supported
else:
self._function_calling_active = self.config.native_tool_calling
@@ -535,10 +601,14 @@ class LLM(RetryMixin, DebugMixin):
Returns:
boolean: True if prompt caching is supported and enabled for the given model.
"""
if not self.config.caching_prompt:
return False
# We don't need to look-up model_info, because only Anthropic models need explicit caching breakpoints
return get_features(self.config.model).supports_prompt_cache
return (
self.config.caching_prompt is True
and (
self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
or self.config.model.split('/')[-1] in CACHE_PROMPT_SUPPORTED_MODELS
)
# We don't need to look-up model_info, because only Anthropic models needs the explicit caching breakpoint
)
def is_function_calling_active(self) -> bool:
"""Returns whether function calling is supported and enabled for this LLM instance.

View File

@@ -1,138 +0,0 @@
from __future__ import annotations
from dataclasses import dataclass
from fnmatch import fnmatch
def normalize_model_name(model: str) -> str:
"""Normalize a model string to a canonical, comparable name.
Strategy:
- Trim whitespace
- Lowercase
- If there is a '/', keep only the basename after the last '/'
(handles prefixes like openrouter/, litellm_proxy/, anthropic/, etc.)
and treat ':' inside that basename as an Ollama-style variant tag to be removed
- There is no provider:model form; providers, when present, use 'provider/model'
- Drop a trailing "-gguf" suffix if present
"""
raw = (model or '').strip().lower()
if '/' in raw:
name = raw.split('/')[-1]
if ':' in name:
# Drop Ollama-style variant tag in basename
name = name.split(':', 1)[0]
else:
# No '/', keep the whole raw name (we do not support provider:model)
name = raw
if name.endswith('-gguf'):
name = name[: -len('-gguf')]
return name
def model_matches(model: str, patterns: list[str]) -> bool:
"""Return True if the model matches any of the glob patterns.
If a pattern contains a '/', it is treated as provider-qualified and matched
against the full, lowercased model string (including provider prefix).
Otherwise, it is matched against the normalized basename.
"""
raw = (model or '').strip().lower()
name = normalize_model_name(model)
for pat in patterns:
pat_l = pat.lower()
if '/' in pat_l:
if fnmatch(raw, pat_l):
return True
else:
if fnmatch(name, pat_l):
return True
return False
@dataclass(frozen=True)
class ModelFeatures:
supports_function_calling: bool
supports_reasoning_effort: bool
supports_prompt_cache: bool
supports_stop_words: bool
# Pattern tables capturing current behavior. Keep patterns lowercase.
FUNCTION_CALLING_PATTERNS: list[str] = [
# Anthropic families
'claude-3-7-sonnet*',
'claude-3.7-sonnet*',
'claude-sonnet-3-7-latest',
'claude-3-5-sonnet*',
'claude-3.5-haiku*',
'claude-3-5-haiku*',
'claude-sonnet-4*',
'claude-opus-4*',
# OpenAI families
'gpt-4o*',
'gpt-4.1',
'gpt-5*',
# o-series (keep exact o1 support per existing list)
'o1-2024-12-17',
'o3*',
'o4-mini*',
# Google Gemini
'gemini-2.5-pro*',
# Others
'kimi-k2-0711-preview',
'kimi-k2-instruct',
'qwen3-coder*',
'qwen3-coder-480b-a35b-instruct',
]
REASONING_EFFORT_PATTERNS: list[str] = [
# Mirror main behavior exactly (no unintended expansion), plus DeepSeek support
'o1-2024-12-17',
'o1',
'o3',
'o3-2025-04-16',
'o3-mini-2025-01-31',
'o3-mini',
'o4-mini',
'o4-mini-2025-04-16',
'gemini-2.5-flash',
'gemini-2.5-pro',
'gpt-5',
'gpt-5-2025-08-07',
# DeepSeek reasoning family
'deepseek-r1-0528*',
]
PROMPT_CACHE_PATTERNS: list[str] = [
'claude-3-7-sonnet*',
'claude-3.7-sonnet*',
'claude-sonnet-3-7-latest',
'claude-3-5-sonnet*',
'claude-3-5-haiku*',
'claude-3.5-haiku*',
'claude-3-haiku-20240307',
'claude-3-opus-20240229',
'claude-sonnet-4*',
'claude-opus-4*',
]
SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [
# o1 family doesn't support stop words
'o1*',
# grok-4 specific model name (basename)
'grok-4-0709',
# DeepSeek R1 family
'deepseek-r1-0528*',
]
def get_features(model: str) -> ModelFeatures:
return ModelFeatures(
supports_function_calling=model_matches(model, FUNCTION_CALLING_PATTERNS),
supports_reasoning_effort=model_matches(model, REASONING_EFFORT_PATTERNS),
supports_prompt_cache=model_matches(model, PROMPT_CACHE_PATTERNS),
supports_stop_words=not model_matches(
model, SUPPORTS_STOP_WORDS_FALSE_PATTERNS
),
)

View File

@@ -5,7 +5,7 @@ from typing import Any, Callable
from openhands.core.exceptions import UserCancelledError
from openhands.core.logger import openhands_logger as logger
from openhands.llm.async_llm import LLM_RETRY_EXCEPTIONS, AsyncLLM
from openhands.llm.model_features import get_features
from openhands.llm.llm import REASONING_EFFORT_SUPPORTED_MODELS
class StreamingLLM(AsyncLLM):
@@ -65,7 +65,7 @@ class StreamingLLM(AsyncLLM):
)
# Set reasoning effort for models that support it
if get_features(self.config.model).supports_reasoning_effort:
if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS:
kwargs['reasoning_effort'] = self.config.reasoning_effort
self.log_prompt(messages)

View File

@@ -67,7 +67,6 @@ from openhands.runtime.plugins import (
from openhands.runtime.runtime_status import RuntimeStatus
from openhands.runtime.utils.edit import FileEditRuntimeMixin
from openhands.runtime.utils.git_handler import CommandResult, GitHandler
from openhands.storage.locations import get_conversation_dir
from openhands.utils.async_utils import (
GENERAL_TIMEOUT,
call_async_from_sync,
@@ -877,14 +876,8 @@ fi
if isinstance(action, AgentThinkAction):
return AgentThinkObservation('Your thought has been logged.')
elif isinstance(action, TaskTrackingAction):
# Get the session-specific task file path
conversation_dir = get_conversation_dir(
self.sid, self.event_stream.user_id
)
task_file_path = f'{conversation_dir}TASKS.md'
# If `command` is `plan`, write the serialized task list to the file TASKS.md under `.openhands/`
if action.command == 'plan':
# Write the serialized task list to the session directory
content = '# Task List\n\n'
for i, task in enumerate(action.task_list, 1):
status_icon = {
@@ -893,39 +886,33 @@ fi
'done': '',
}.get(task.get('status', 'todo'), '')
content += f'{i}. {status_icon} {task.get("title", "")}\n{task.get("notes", "")}\n'
try:
self.event_stream.file_store.write(task_file_path, content)
return TaskTrackingObservation(
content=f'Task list has been updated with {len(action.task_list)} items. Stored in session directory: {task_file_path}',
command=action.command,
task_list=action.task_list,
)
except Exception as e:
write_obs = self.write(
FileWriteAction(path='.openhands/TASKS.md', content=content)
)
if isinstance(write_obs, ErrorObservation):
return ErrorObservation(
f'Failed to write task list to session directory {task_file_path}: {str(e)}'
f'Failed to write task list to .openhands/TASKS.md: {write_obs.content}'
)
return TaskTrackingObservation(
content=f'Task list has been updated with {len(action.task_list)} items.',
command=action.command,
task_list=action.task_list,
)
elif action.command == 'view':
# Read the TASKS.md file from the session directory
try:
content = self.event_stream.file_store.read(task_file_path)
# If `command` is `view`, read the TASKS.md file and return its content
read_obs = self.read(FileReadAction(path='.openhands/TASKS.md'))
if isinstance(read_obs, FileReadObservation):
return TaskTrackingObservation(
content=content,
content=read_obs.content,
command=action.command,
task_list=[], # Empty for view command
)
except FileNotFoundError:
return TaskTrackingObservation(
else:
return TaskTrackingObservation( # Return observation if error occurs because file might not exist yet
command=action.command,
task_list=[],
content='No task list found. Use the "plan" command to create one.',
)
except Exception as e:
return TaskTrackingObservation(
command=action.command,
task_list=[],
content=f'Failed to read the task list from session directory {task_file_path}. Error: {str(e)}',
content=f'Failed to read the task list. Error: {read_obs.content}',
)
return NullObservation('')

View File

@@ -213,23 +213,6 @@ class DockerRuntime(ActionExecutionClient):
self.set_runtime_status(RuntimeStatus.READY)
self._runtime_initialized = True
for network_name in self.config.sandbox.additional_networks:
try:
network = self.docker_client.networks.get(network_name)
if self.container is not None:
network.connect(self.container)
else:
self.log(
'warning',
f'Container not available to connect to network {network_name}',
)
except Exception as e:
self.log(
'error',
f'Error: Failed to connect instance {self.container_name} to network {network_name}',
)
self.log('error', str(e))
def maybe_build_runtime_container_image(self):
if self.runtime_container_image is None:
if self.base_container_image is None:

View File

@@ -201,14 +201,8 @@ class LocalRuntime(ActionExecutionClient):
# If there is an API key in the environment we use this in requests to the runtime
session_api_key = os.getenv('SESSION_API_KEY')
self._session_api_key: str | None = None
if session_api_key:
self.session.headers['X-Session-API-Key'] = session_api_key
self._session_api_key = session_api_key
@property
def session_api_key(self) -> str | None:
return self._session_api_key
@property
def action_execution_server_url(self) -> str:

View File

@@ -177,7 +177,9 @@ RUN \
/openhands/micromamba/bin/micromamba run -n openhands poetry install --only main,runtime --no-interaction --no-root && \
# Update and install additional tools
# (There used to be an "apt-get update" here, hopefully we can skip it.)
{% if enable_browser %}/openhands/micromamba/bin/micromamba run -n openhands poetry run playwright install --with-deps chromium && \{% endif %}
{% if enable_browser %}
/openhands/micromamba/bin/micromamba run -n openhands poetry run playwright install --with-deps chromium && \
{% endif %}
# Set environment variables
/openhands/micromamba/bin/micromamba run -n openhands poetry run python -c "import sys; print('OH_INTERPRETER_PATH=' + sys.executable)" >> /etc/environment && \
# Set permissions

View File

@@ -63,7 +63,7 @@ from openhands.server.user_auth import (
)
from openhands.server.user_auth.user_auth import AuthType
from openhands.server.utils import get_conversation as get_conversation_metadata
from openhands.server.utils import get_conversation_store, validate_conversation_id
from openhands.server.utils import get_conversation_store
from openhands.storage.conversation.conversation_store import ConversationStore
from openhands.storage.data_models.conversation_metadata import (
ConversationMetadata,
@@ -297,7 +297,7 @@ async def search_conversations(
@app.get('/conversations/{conversation_id}')
async def get_conversation(
conversation_id: str = Depends(validate_conversation_id),
conversation_id: str,
conversation_store: ConversationStore = Depends(get_conversation_store),
) -> ConversationInfo | None:
try:
@@ -319,7 +319,7 @@ async def get_conversation(
@app.delete('/conversations/{conversation_id}')
async def delete_conversation(
conversation_id: str = Depends(validate_conversation_id),
conversation_id: str,
user_id: str | None = Depends(get_user_id),
) -> bool:
conversation_store = await ConversationStoreImpl.get_instance(config, user_id)
@@ -338,8 +338,8 @@ async def delete_conversation(
@app.get('/conversations/{conversation_id}/remember-prompt')
async def get_prompt(
conversation_id: str,
event_id: int,
conversation_id: str = Depends(validate_conversation_id),
user_settings: SettingsStore = Depends(get_user_settings_store),
metadata: ConversationMetadata = Depends(get_conversation_metadata),
):
@@ -440,8 +440,8 @@ async def _get_conversation_info(
@app.post('/conversations/{conversation_id}/start')
async def start_conversation(
conversation_id: str,
providers_set: ProvidersSetModel,
conversation_id: str = Depends(validate_conversation_id),
user_id: str = Depends(get_user_id),
settings: Settings = Depends(get_user_settings),
conversation_store: ConversationStore = Depends(get_conversation_store),
@@ -501,7 +501,7 @@ async def start_conversation(
@app.post('/conversations/{conversation_id}/stop')
async def stop_conversation(
conversation_id: str = Depends(validate_conversation_id),
conversation_id: str,
user_id: str = Depends(get_user_id),
) -> ConversationResponse:
"""Stop an agent loop for a conversation.
@@ -606,8 +606,8 @@ class UpdateConversationRequest(BaseModel):
@app.patch('/conversations/{conversation_id}')
async def update_conversation(
conversation_id: str,
data: UpdateConversationRequest,
conversation_id: str = Depends(validate_conversation_id),
user_id: str | None = Depends(get_user_id),
conversation_store: ConversationStore = Depends(get_conversation_store),
) -> bool:
@@ -714,8 +714,7 @@ async def update_conversation(
@app.post('/conversations/{conversation_id}/exp-config')
def add_experiment_config_for_conversation(
exp_config: ExperimentConfig,
conversation_id: str = Depends(validate_conversation_id),
conversation_id: str, exp_config: ExperimentConfig
) -> bool:
exp_config_filepath = get_experiment_config_filename(conversation_id)
exists = False

View File

@@ -13,50 +13,6 @@ from openhands.storage.conversation.conversation_store import ConversationStore
from openhands.storage.data_models.conversation_metadata import ConversationMetadata
def validate_conversation_id(conversation_id: str) -> str:
"""
Validate conversation ID format and length.
Args:
conversation_id: The conversation ID to validate
Returns:
The validated conversation ID
Raises:
HTTPException: If the conversation ID is invalid
"""
# Check length - UUID hex is 32 characters, allow some flexibility but not excessive
if len(conversation_id) > 100:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail='Conversation ID is too long',
)
# Check for null bytes and other problematic characters
if '\x00' in conversation_id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail='Conversation ID contains invalid characters',
)
# Check for path traversal attempts
if '..' in conversation_id or '/' in conversation_id or '\\' in conversation_id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail='Conversation ID contains invalid path characters',
)
# Check for control characters and newlines
if any(ord(c) < 32 for c in conversation_id):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail='Conversation ID contains control characters',
)
return conversation_id
async def get_conversation_store(request: Request) -> ConversationStore | None:
conversation_store: ConversationStore | None = getattr(
request.state, 'conversation_store', None

33
poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
[[package]]
name = "aiofiles"
@@ -404,7 +404,7 @@ description = "LTS Port of Python audioop"
optional = false
python-versions = ">=3.13"
groups = ["main"]
markers = "python_version >= \"3.13\""
markers = "python_version == \"3.13\""
files = [
{file = "audioop_lts-0.2.1-cp313-abi3-macosx_10_13_universal2.whl", hash = "sha256:fd1345ae99e17e6910f47ce7d52673c6a1a70820d78b67de1b7abb3af29c426a"},
{file = "audioop_lts-0.2.1-cp313-abi3-macosx_10_13_x86_64.whl", hash = "sha256:e175350da05d2087e12cea8e72a70a1a8b14a17e92ed2022952a4419689ede5e"},
@@ -2997,8 +2997,8 @@ files = [
google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
proto-plus = [
{version = ">=1.22.3,<2.0.0dev"},
{version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
{version = ">=1.22.3,<2.0.0dev"},
]
protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"
@@ -3020,8 +3020,8 @@ googleapis-common-protos = ">=1.56.2,<2.0.0"
grpcio = {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
grpcio-status = {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
proto-plus = [
{version = ">=1.22.3,<2.0.0"},
{version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.22.3,<2.0.0"},
]
protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
requests = ">=2.18.0,<3.0.0"
@@ -3239,8 +3239,8 @@ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0", extras
google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0"
grpc-google-iam-v1 = ">=0.14.0,<1.0.0"
proto-plus = [
{version = ">=1.22.3,<2.0.0"},
{version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.22.3,<2.0.0"},
]
protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
@@ -6558,14 +6558,14 @@ pydantic = ">=1.8"
[[package]]
name = "openhands-aci"
version = "0.3.2"
version = "0.3.1"
description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands."
optional = false
python-versions = "<4.0,>=3.12"
groups = ["main"]
files = [
{file = "openhands_aci-0.3.2-py3-none-any.whl", hash = "sha256:a3ff6fe3dd50124598b8bc3aff8d9742d6e75f933f7e7635a9d0b37d45eb826e"},
{file = "openhands_aci-0.3.2.tar.gz", hash = "sha256:df7b64df6acb70b45b23e88c13508e7af8f27725bed30c3e88691a0f3d1f7a44"},
{file = "openhands_aci-0.3.1-py3-none-any.whl", hash = "sha256:d1d9d5379388bc0119c6722b8dacf63f7c747788ac5b6c26263601b2001d11c3"},
{file = "openhands_aci-0.3.1.tar.gz", hash = "sha256:125c4773b3fd2729ec0c74d005095dad21aa0f7a1e8733e5f33f3f71466f6df9"},
]
[package.dependencies]
@@ -6663,8 +6663,8 @@ files = [
[package.dependencies]
googleapis-common-protos = ">=1.52,<2.0"
grpcio = [
{version = ">=1.63.2,<2.0.0", markers = "python_version < \"3.13\""},
{version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.63.2,<2.0.0", markers = "python_version < \"3.13\""},
]
opentelemetry-api = ">=1.15,<2.0"
opentelemetry-exporter-otlp-proto-common = "1.34.1"
@@ -9438,7 +9438,6 @@ files = [
{file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"},
{file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"},
]
markers = {evaluation = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
[package.extras]
check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""]
@@ -9682,7 +9681,7 @@ description = "Standard library aifc redistribution. \"dead battery\"."
optional = false
python-versions = "*"
groups = ["main"]
markers = "python_version >= \"3.13\""
markers = "python_version == \"3.13\""
files = [
{file = "standard_aifc-3.13.0-py3-none-any.whl", hash = "sha256:f7ae09cc57de1224a0dd8e3eb8f73830be7c3d0bc485de4c1f82b4a7f645ac66"},
{file = "standard_aifc-3.13.0.tar.gz", hash = "sha256:64e249c7cb4b3daf2fdba4e95721f811bde8bdfc43ad9f936589b7bb2fae2e43"},
@@ -9699,7 +9698,7 @@ description = "Standard library chunk redistribution. \"dead battery\"."
optional = false
python-versions = "*"
groups = ["main"]
markers = "python_version >= \"3.13\""
markers = "python_version == \"3.13\""
files = [
{file = "standard_chunk-3.13.0-py3-none-any.whl", hash = "sha256:17880a26c285189c644bd5bd8f8ed2bdb795d216e3293e6dbe55bbd848e2982c"},
{file = "standard_chunk-3.13.0.tar.gz", hash = "sha256:4ac345d37d7e686d2755e01836b8d98eda0d1a3ee90375e597ae43aaf064d654"},
@@ -11388,14 +11387,14 @@ test = ["pytest", "pytest-cov"]
[[package]]
name = "xlsxwriter"
version = "3.2.5"
version = "3.2.3"
description = "A Python module for creating Excel XLSX files."
optional = false
python-versions = ">=3.8"
python-versions = ">=3.6"
groups = ["main"]
files = [
{file = "xlsxwriter-3.2.5-py3-none-any.whl", hash = "sha256:4f4824234e1eaf9d95df9a8fe974585ff91d0f5e3d3f12ace5b71e443c1c6abd"},
{file = "xlsxwriter-3.2.5.tar.gz", hash = "sha256:7e88469d607cdc920151c0ab3ce9cf1a83992d4b7bc730c5ffdd1a12115a7dbe"},
{file = "XlsxWriter-3.2.3-py3-none-any.whl", hash = "sha256:593f8296e8a91790c6d0378ab08b064f34a642b3feb787cf6738236bd0a4860d"},
{file = "xlsxwriter-3.2.3.tar.gz", hash = "sha256:ad6fd41bdcf1b885876b1f6b7087560aecc9ae5a9cc2ba97dcac7ab2e210d3d5"},
]
[[package]]
@@ -11879,4 +11878,4 @@ third-party-runtimes = ["daytona", "e2b", "modal", "runloop-api-client"]
[metadata]
lock-version = "2.1"
python-versions = "^3.12,<3.14"
content-hash = "469b54a3f7f5d104f68503fc70a89c016cbb7d9b7dc019226ed62e93ee928b98"
content-hash = "6dacf40441269b9e00d5d37c2dc034c725d77c2eb88118c0a36bf54d76d076ec"

View File

@@ -64,7 +64,7 @@ opentelemetry-exporter-otlp-proto-grpc = "^1.33.1"
libtmux = ">=0.37,<0.40"
pygithub = "^2.5.0"
joblib = "*"
openhands-aci = "0.3.2"
openhands-aci = "0.3.1"
python-socketio = "^5.11.4"
sse-starlette = "^2.1.3"
psutil = "*"

View File

@@ -1,226 +0,0 @@
#!/usr/bin/env python3
"""
Update OpenHands OpenAPI documentation.
Generates the OpenAPI specification from the FastAPI application and writes it
to docs/openapi.json.
Usage:
python scripts/update_openapi.py
Behavior:
- Uses openhands.server.app.app.openapi() to build the spec.
- Preserves existing "servers" from docs/openapi.json if present; otherwise
writes sensible defaults.
- Sets info.version to openhands.__version__.
- Sanitizes endpoint descriptions to remove code blocks and internal-only sections.
- Excludes operational/UI-only convenience endpoints:
- /server_info
- /api/conversations/{conversation_id}/vscode-url
- /api/conversations/{conversation_id}/web-hosts
- Creates a backup docs/openapi.json.backup before overwriting.
Output:
- Prints OpenAPI and API versions, endpoint count, servers count, and sample endpoints.
"""
import json
import logging
import os
import sys
import warnings
from pathlib import Path
# Suppress warnings and logs during import
logging.getLogger().setLevel(logging.CRITICAL)
warnings.filterwarnings('ignore')
os.environ['OPENHANDS_LOG_LEVEL'] = 'CRITICAL'
# Add the project root to the Python path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
try:
from openhands import __version__
from openhands.server.app import app
except ImportError as e:
print(f'Error importing OpenHands modules: {e}')
print(
"Make sure you're running this script from the project root and dependencies are installed."
)
sys.exit(1)
def _sanitize_description(text: str) -> str:
"""Remove internal, code-centric, or redundant sections from endpoint descriptions.
- Strip fenced code blocks
- Remove Args/Returns/Raises/Example/Examples/Notes sections
- Remove inline curl examples
- Avoid provider-implementation specifics like LiteLLM/Bedrock
"""
import re
if not text:
return text
# Remove fenced code blocks
text = re.sub(r'```[\s\S]*?```', '', text, flags=re.MULTILINE)
# Remove common docstring sections (until next blank line or end)
for header in [
r'Args?:',
r'Returns?:',
r'Raises?:',
r'Example[s]?:',
r'Notes?:',
]:
text = re.sub(rf'(?ms)^\s*{header}.*?(?:\n\s*\n|\Z)', '', text)
# Remove lines that contain curl examples
text = re.sub(r'(?im)^.*\bcurl\b.*$', '', text)
# Generalize provider-implementation specifics
text = re.sub(r'\bLiteLLM\b', 'configured model providers', text)
text = re.sub(r'\blitellm\b', 'configured providers', text)
text = re.sub(r'\bBedrock\b', '', text)
# Collapse excessive blank lines and trim
text = re.sub(r'\n{3,}', '\n\n', text).strip()
return text
def _sanitize_spec(spec: dict) -> dict:
"""Sanitize descriptions and summaries to be public-API friendly."""
path_summary_overrides = {
'/api/options/models': 'List Supported Models',
'/api/options/agents': 'List Agents',
'/api/options/security-analyzers': 'List Security Analyzers',
'/api/conversations/{conversation_id}/list-files': 'List Workspace Files',
'/api/conversations/{conversation_id}/select-file': 'Get File Content',
'/api/conversations/{conversation_id}/zip-directory': 'Download Workspace Archive',
}
path_description_overrides = {
'/api/options/models': 'List model identifiers available on this server based on configured providers.',
'/api/options/agents': 'List available agent types supported by this server.',
'/api/options/security-analyzers': 'List supported security analyzers.',
'/api/conversations/{conversation_id}/list-files': 'List workspace files visible to the conversation runtime. Applies .gitignore and internal ignore rules.',
'/api/conversations/{conversation_id}/select-file': 'Return the content of the given file from the conversation workspace.',
'/api/conversations/{conversation_id}/zip-directory': 'Return a ZIP archive of the current conversation workspace.',
}
for path, methods in list(spec.get('paths', {}).items()):
for method, meta in list(methods.items()):
if not isinstance(meta, dict):
continue
# Override overly specific summaries where helpful
if path in path_summary_overrides:
meta['summary'] = path_summary_overrides[path]
# Override description if provided; otherwise sanitize
if path in path_description_overrides:
meta['description'] = path_description_overrides[path]
elif 'description' in meta and isinstance(meta['description'], str):
meta['description'] = _sanitize_description(meta['description'])
return spec
def generate_openapi_spec():
"""Generate the OpenAPI specification from the FastAPI app."""
spec = app.openapi()
# Explicitly exclude certain endpoints that are operational, experimental, or UI-only convenience
excluded_endpoints = [
'/api/conversations/{conversation_id}/exp-config', # Internal experimentation endpoint
'/server_info', # Operational/system diagnostics
'/api/conversations/{conversation_id}/vscode-url', # UI/runtime convenience
'/api/conversations/{conversation_id}/web-hosts', # UI/runtime convenience
]
if 'paths' in spec:
for endpoint in excluded_endpoints:
if endpoint in spec['paths']:
del spec['paths'][endpoint]
print(f'Excluded endpoint: {endpoint}')
# Sanitize descriptions and summaries
spec = _sanitize_spec(spec)
return spec
def load_current_spec(spec_path):
"""Load the current OpenAPI specification if it exists."""
if spec_path.exists():
with open(spec_path, 'r') as f:
return json.load(f)
return {}
def update_openapi_spec(spec_path, backup=True):
"""Update the OpenAPI specification file."""
# Generate new spec
new_spec = generate_openapi_spec()
# Load current spec for server information
current_spec = load_current_spec(spec_path)
# Preserve server information from current spec if it exists
if 'servers' in current_spec:
new_spec['servers'] = current_spec['servers']
else:
# Default servers if none exist
new_spec['servers'] = [
{'url': 'https://app.all-hands.dev', 'description': 'Production server'},
{'url': 'http://localhost:3000', 'description': 'Local server'},
]
# Update version to match the package version
new_spec['info']['version'] = __version__
# Backup current file if requested
if backup and spec_path.exists():
backup_path = spec_path.with_suffix('.json.backup')
spec_path.rename(backup_path)
print(f'Backed up current spec to {backup_path}')
# Write new spec
with open(spec_path, 'w') as f:
json.dump(new_spec, f, indent=2)
return new_spec
def main():
"""Main function."""
spec_path = project_root / 'docs' / 'openapi.json'
print('Updating OpenAPI specification...')
print(f'Target file: {spec_path}')
try:
new_spec = update_openapi_spec(spec_path)
print('✅ Successfully updated OpenAPI specification!')
print(f' OpenAPI version: {new_spec.get("openapi", "N/A")}')
print(f' API version: {new_spec.get("info", {}).get("version", "N/A")}')
print(f' Total endpoints: {len(new_spec.get("paths", {}))}')
print(f' Servers: {len(new_spec.get("servers", []))}')
# List some key endpoints
paths = list(new_spec.get('paths', {}).keys())
if paths:
print(' Sample endpoints:')
for path in sorted(paths)[:5]:
methods = list(new_spec['paths'][path].keys())
print(f' {path}: {methods}')
if len(paths) > 5:
print(f' ... and {len(paths) - 5} more')
except Exception as e:
print(f'❌ Error updating OpenAPI specification: {e}')
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -94,7 +94,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
# Verify the server is actually stopped by trying to start another one
# on the same port (regardless of OS)
action = CmdRunAction(command='ls')
action.set_hard_timeout(3)
action.set_hard_timeout(1)
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)

View File

@@ -44,7 +44,7 @@ def sse_mcp_docker_server():
container_command_args = [
'--stdio',
'npx -y @modelcontextprotocol/server-filesystem@2025.8.18 /',
'npx -y @modelcontextprotocol/server-filesystem /',
'--port',
str(container_internal_port), # MCP server inside container listens on this
'--baseUrl',
@@ -292,7 +292,7 @@ async def test_microagent_and_one_stdio_mcp_in_config(
name='filesystem',
command='npx',
args=[
'@modelcontextprotocol/server-filesystem@2025.8.18',
'@modelcontextprotocol/server-filesystem',
'/',
],
)

View File

@@ -12,10 +12,8 @@ from litellm.exceptions import (
from openhands.core.config import LLMConfig
from openhands.core.exceptions import LLMNoResponseError, OperationCancelled
from openhands.core.message import Message, TextContent
from openhands.llm.async_llm import AsyncLLM
from openhands.llm.llm import LLM
from openhands.llm.metrics import Metrics, TokenUsage
from openhands.llm.streaming_llm import StreamingLLM
@pytest.fixture(autouse=True)
@@ -254,7 +252,7 @@ def test_response_latency_tracking(mock_time, mock_litellm_completion):
@patch('openhands.llm.llm.litellm.get_model_info')
def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
default_config.model = 'openrouter/gpt-4o-mini'
default_config.model = 'openrouter:gpt-4o-mini'
mock_get_model_info.return_value = {
'max_input_tokens': 7000,
'max_output_tokens': 1500,
@@ -263,7 +261,7 @@ def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
llm.init_model_info()
assert llm.config.max_input_tokens == 7000
assert llm.config.max_output_tokens == 1500
mock_get_model_info.assert_called_once_with('openrouter/gpt-4o-mini')
mock_get_model_info.assert_called_once_with('openrouter:gpt-4o-mini')
@patch('openhands.llm.llm.litellm_completion')
@@ -1203,108 +1201,6 @@ def test_gemini_medium_reasoning_effort_passes_through(mock_completion):
assert call_kwargs.get('reasoning_effort') == 'medium'
@patch('openhands.llm.llm.litellm_completion')
def test_opus_41_keeps_temperature_top_p(mock_completion):
mock_completion.return_value = {
'choices': [{'message': {'content': 'ok'}}],
}
config = LLMConfig(
model='anthropic/claude-opus-4-1-20250805',
api_key='k',
temperature=0.7,
top_p=0.9,
)
llm = LLM(config, service_id='svc')
llm.completion(messages=[{'role': 'user', 'content': 'hi'}])
call_kwargs = mock_completion.call_args[1]
assert call_kwargs.get('temperature') == 0.7
# Anthropic rejects both temperature and top_p together on Opus; we keep temperature and drop top_p
assert 'top_p' not in call_kwargs
@patch('openhands.llm.llm.litellm_completion')
def test_opus_4_keeps_temperature_top_p(mock_completion):
mock_completion.return_value = {
'choices': [{'message': {'content': 'ok'}}],
}
config = LLMConfig(
model='anthropic/claude-opus-4-20250514',
api_key='k',
temperature=0.7,
top_p=0.9,
)
llm = LLM(config, service_id='svc')
llm.completion(messages=[{'role': 'user', 'content': 'hi'}])
call_kwargs = mock_completion.call_args[1]
assert call_kwargs.get('temperature') == 0.7
assert call_kwargs.get('top_p') == 0.9
@patch('openhands.llm.llm.litellm_completion')
def test_opus_41_disables_thinking(mock_completion):
mock_completion.return_value = {
'choices': [{'message': {'content': 'ok'}}],
}
config = LLMConfig(
model='anthropic/claude-opus-4-1-20250805',
api_key='k',
)
llm = LLM(config, service_id='svc')
llm.completion(messages=[{'role': 'user', 'content': 'hi'}])
call_kwargs = mock_completion.call_args[1]
assert call_kwargs.get('thinking') == {'type': 'disabled'}
@patch('openhands.llm.llm.litellm.get_model_info')
def test_is_caching_prompt_active_anthropic_prefixed(mock_get_model_info):
# Avoid external calls, but behavior shouldn't depend on model info
mock_get_model_info.side_effect = Exception('skip')
config = LLMConfig(
model='anthropic/claude-3-7-sonnet', api_key='k', caching_prompt=True
)
llm = LLM(config, service_id='svc')
assert llm.is_caching_prompt_active() is True
@patch('openhands.llm.llm.httpx.get')
@patch('openhands.llm.llm.litellm.get_model_info')
def test_openhands_provider_rewrite_and_caching_prompt(
mock_get_model_info, mock_httpx_get
):
# Mock LiteLLM proxy /v1/model/info response
mock_httpx_get.return_value = type(
'Resp',
(),
{
'json': lambda self=None: {
'data': [
{
'model_name': 'claude-3.7-sonnet',
'model_info': {
'max_input_tokens': 200000,
'max_output_tokens': 64000,
'supports_vision': True,
},
}
]
}
},
)()
mock_get_model_info.return_value = {
'max_input_tokens': 200000,
'max_output_tokens': 64000,
}
config = LLMConfig(
model='openhands/claude-3.7-sonnet', api_key='k', caching_prompt=True
)
llm = LLM(config, service_id='svc')
# Model should be rewritten to litellm_proxy/...
assert llm.config.model.startswith('litellm_proxy/claude-3.7-sonnet')
# Caching prompt should be active for Claude
assert llm.is_caching_prompt_active() is True
@patch('openhands.llm.llm.litellm_completion')
def test_gemini_high_reasoning_effort_passes_through(mock_completion):
"""Test that Gemini with reasoning_effort='high' passes through to litellm."""
@@ -1343,61 +1239,10 @@ def test_non_gemini_uses_reasoning_effort(mock_completion):
sample_messages = [{'role': 'user', 'content': 'Hello, how are you?'}]
llm.completion(messages=sample_messages)
@patch('openhands.llm.async_llm.litellm_acompletion')
@pytest.mark.asyncio
async def test_async_reasoning_effort_passthrough(mock_acompletion):
mock_acompletion.return_value = {
'choices': [{'message': {'content': 'ok'}}],
}
config = LLMConfig(
model='o3', api_key='k', temperature=0.7, top_p=0.9, reasoning_effort='low'
)
llm = AsyncLLM(config, service_id='svc')
await llm.async_completion(messages=[{'role': 'user', 'content': 'hi'}])
call_kwargs = mock_acompletion.call_args[1]
assert call_kwargs.get('reasoning_effort') == 'low'
# Async path does not pop temperature/top_p (parity with main)
assert call_kwargs.get('temperature') == 0.7
assert call_kwargs.get('top_p') == 0.9
@patch('openhands.llm.streaming_llm.AsyncLLM._call_acompletion')
@pytest.mark.asyncio
async def test_streaming_reasoning_effort_passthrough(mock_call):
async def fake_stream(*args, **kwargs):
class Dummy:
async def __aiter__(self):
yield {'choices': [{'delta': {'content': 'x'}}]}
return Dummy()
mock_call.side_effect = fake_stream
config = LLMConfig(
model='o3', api_key='k', temperature=0.7, top_p=0.9, reasoning_effort='low'
)
sllm = StreamingLLM(config, service_id='svc')
async for _ in sllm.async_streaming_completion(
messages=[{'role': 'user', 'content': 'hi'}]
):
break
call_kwargs = mock_call.call_args[1]
assert call_kwargs.get('reasoning_effort') == 'low'
assert call_kwargs.get('temperature') == 0.7
assert call_kwargs.get('top_p') == 0.9
@patch('openhands.llm.async_llm.litellm_acompletion')
@pytest.mark.asyncio
async def test_async_streaming_no_thinking_for_gemini(mock_acompletion):
mock_acompletion.return_value = {
'choices': [{'message': {'content': 'ok'}}],
}
config = LLMConfig(model='gemini-2.5-pro', api_key='k', reasoning_effort='low')
llm = AsyncLLM(config, service_id='svc')
await llm.async_completion(messages=[{'role': 'user', 'content': 'hi'}])
call_kwargs = mock_acompletion.call_args[1]
# Verify that reasoning_effort was used and thinking budget was not set
call_kwargs = mock_completion.call_args[1]
assert 'thinking' not in call_kwargs
assert call_kwargs.get('reasoning_effort') == 'high'
@patch('openhands.llm.llm.litellm_completion')

View File

@@ -96,52 +96,6 @@ FNCALL_TOOLS: list[ChatCompletionToolParam] = [
]
def test_malformed_parameter_parsing_recovery():
"""Ensure we can recover when models emit malformed parameter tags like <parameter=command=str_replace</parameter>.
This simulates a tool call to str_replace_editor where the 'command' parameter is malformed.
"""
from openhands.llm.fn_call_converter import (
convert_non_fncall_messages_to_fncall_messages,
)
# Construct an assistant message with malformed parameter tag for 'command'
assistant_message = {
'role': 'assistant',
'content': (
'<function=str_replace_editor>\n'
'<parameter=command=str_replace</parameter>\n' # malformed form
'<parameter=path>/repo/app.py</parameter>\n'
'<parameter=old_str>foo</parameter>\n'
'<parameter=new_str>bar</parameter>\n'
'</function>'
),
}
messages = [
{'role': 'system', 'content': 'test'},
{'role': 'user', 'content': 'do edit'},
assistant_message,
]
converted = convert_non_fncall_messages_to_fncall_messages(messages, FNCALL_TOOLS)
# The last message should be assistant with a parsed tool call
last = converted[-1]
assert last['role'] == 'assistant'
assert 'tool_calls' in last and len(last['tool_calls']) == 1
tool_call = last['tool_calls'][0]
assert tool_call['type'] == 'function'
assert tool_call['function']['name'] == 'str_replace_editor'
# Arguments must be a valid JSON with command=str_replace and proper params
args = json.loads(tool_call['function']['arguments'])
assert args['command'] == 'str_replace'
assert args['path'] == '/repo/app.py'
assert args['old_str'] == 'foo'
assert args['new_str'] == 'bar'
def test_convert_tools_to_description():
formatted_tools = convert_tools_to_description(FNCALL_TOOLS)
print(formatted_tools)

View File

@@ -1,291 +0,0 @@
import pytest
from openhands.llm.model_features import (
ModelFeatures,
get_features,
model_matches,
normalize_model_name,
)
@pytest.mark.parametrize(
'raw,expected',
[
(' OPENAI/gpt-4o ', 'gpt-4o'),
('anthropic/claude-3-7-sonnet', 'claude-3-7-sonnet'),
('litellm_proxy/gemini-2.5-pro', 'gemini-2.5-pro'),
('qwen3-coder-480b-a35b-instruct', 'qwen3-coder-480b-a35b-instruct'),
('gpt-5', 'gpt-5'),
('deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', 'deepseek-r1-0528'),
('openai/GLM-4.5-GGUF', 'glm-4.5'),
('openrouter/gpt-4o-mini', 'gpt-4o-mini'),
(
'bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0',
'anthropic.claude-3-5-sonnet-20241022-v2',
),
('', ''),
(None, ''), # type: ignore[arg-type]
],
)
def test_normalize_model_name(raw, expected):
assert normalize_model_name(raw) == expected
@pytest.mark.parametrize(
'name,pattern,expected',
[
('gpt-4o', 'gpt-4o*', True),
('openai/gpt-4o', 'gpt-4o*', True),
('litellm_proxy/gpt-4o-mini', 'gpt-4o*', True),
('claude-3-7-sonnet-20250219', 'claude-3-7-sonnet*', True),
('o1-2024-12-17', 'o1*', True),
('grok-4-0709', 'grok-4-0709', True),
('grok-4-0801', 'grok-4-0709', False),
],
)
def test_model_matches(name, pattern, expected):
assert model_matches(name, [pattern]) is expected
@pytest.mark.parametrize(
'name,pattern,expected',
[
('openai/gpt-4o', 'openai/gpt-4o*', True),
('openrouter/gpt-4o', 'openai/gpt-4o*', False),
('litellm_proxy/gpt-4o-mini', 'litellm_proxy/gpt-4o*', True),
(
'gpt-4o',
'openai/gpt-4o*',
False,
), # basename alone should not match provider-qualified
('unknown-model', 'gpt-5*', False),
],
)
def test_model_matches_provider_qualified(name, pattern, expected):
assert model_matches(name, [pattern]) is expected
@pytest.mark.parametrize(
'model,expect',
[
(
'gpt-4o',
ModelFeatures(
supports_function_calling=True,
supports_reasoning_effort=False,
supports_prompt_cache=False,
supports_stop_words=True,
),
),
(
'gpt-5',
ModelFeatures(
supports_function_calling=True,
supports_reasoning_effort=True,
supports_prompt_cache=False,
supports_stop_words=True,
),
),
(
'o3-mini',
ModelFeatures(
supports_function_calling=True,
supports_reasoning_effort=True,
supports_prompt_cache=False,
supports_stop_words=True,
),
),
(
'o1-2024-12-17',
ModelFeatures(
supports_function_calling=True,
supports_reasoning_effort=True,
supports_prompt_cache=False,
supports_stop_words=False,
),
),
(
'xai/grok-4-0709',
ModelFeatures(
supports_function_calling=False,
supports_reasoning_effort=False,
supports_prompt_cache=False,
supports_stop_words=False,
),
),
(
'anthropic/claude-3-7-sonnet',
ModelFeatures(
supports_function_calling=True,
supports_reasoning_effort=False,
supports_prompt_cache=True,
supports_stop_words=True,
),
),
(
'litellm_proxy/claude-3.7-sonnet',
ModelFeatures(
supports_function_calling=True,
supports_reasoning_effort=False,
supports_prompt_cache=True,
supports_stop_words=True,
),
),
(
'gemini-2.5-pro',
ModelFeatures(
supports_function_calling=True,
supports_reasoning_effort=True,
supports_prompt_cache=False,
supports_stop_words=True,
),
),
(
'openai/gpt-4o',
ModelFeatures(
supports_function_calling=True,
supports_reasoning_effort=False,
supports_prompt_cache=False,
supports_stop_words=True,
),
), # provider-qualified still matches basename patterns
],
)
def test_get_features(model, expect):
features = get_features(model)
assert features == expect
@pytest.mark.parametrize(
'model',
[
# Anthropic families
'claude-3-7-sonnet-20250219',
'claude-3.7-sonnet',
'claude-sonnet-3-7-latest',
'claude-3-5-sonnet',
'claude-3.5-haiku',
'claude-3-5-haiku-20241022',
'claude-sonnet-4-latest',
'claude-opus-4-1-20250805',
# OpenAI families
'gpt-4o',
'gpt-4.1',
'gpt-5',
# o-series
'o1-2024-12-17',
'o3-mini',
'o4-mini',
# Google Gemini
'gemini-2.5-pro',
# Others
'kimi-k2-0711-preview',
'kimi-k2-instruct',
'qwen3-coder',
'qwen3-coder-480b-a35b-instruct',
],
)
def test_function_calling_models(model):
features = get_features(model)
assert features.supports_function_calling is True
@pytest.mark.parametrize(
'model',
[
'o1-2024-12-17',
'o3-mini',
'o4-mini',
'gemini-2.5-flash',
'gemini-2.5-pro',
'gpt-5',
],
)
def test_reasoning_effort_models(model):
features = get_features(model)
assert features.supports_reasoning_effort is True
@pytest.mark.parametrize(
'model',
[
'deepseek/DeepSeek-R1-0528:671b-Q4_K_XL',
'DeepSeek-R1-0528',
],
)
def test_deepseek_reasoning_effort_models(model):
features = get_features(model)
assert features.supports_reasoning_effort is True
@pytest.mark.parametrize(
'model',
[
'claude-3-7-sonnet-20250219',
'claude-3.7-sonnet',
'claude-sonnet-3-7-latest',
'claude-3-5-sonnet',
'claude-3-5-haiku-20241022',
'claude-3-haiku-20240307',
'claude-3-opus-20240229',
'claude-sonnet-4-latest',
],
)
def test_prompt_cache_models(model):
features = get_features(model)
assert features.supports_prompt_cache is True
@pytest.mark.parametrize(
'model,expected',
[
# Positive cases: exactly those supported on main
('o1', True),
('o1-2024-12-17', True),
('o3', True),
('o3-2025-04-16', True),
('o3-mini', True),
('o3-mini-2025-01-31', True),
('o4-mini', True),
('o4-mini-2025-04-16', True),
('gemini-2.5-flash', True),
('gemini-2.5-pro', True),
('gpt-5', True),
('gpt-5-2025-08-07', True),
('claude-opus-4-1-20250805', False),
# DeepSeek
('deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', True),
('DeepSeek-R1-0528', True),
# Negative cases: ensure we didn't unintentionally expand
('o1-mini', False),
('o1-preview', False),
('gemini-1.0-pro', False),
],
)
def test_reasoning_effort_parity_with_main(model, expected):
assert get_features(model).supports_reasoning_effort is expected
def test_prompt_cache_haiku_variants():
assert get_features('claude-3-5-haiku-20241022').supports_prompt_cache is True
assert get_features('claude-3.5-haiku-20241022').supports_prompt_cache is True
def test_stop_words_grok_provider_prefixed():
assert get_features('xai/grok-4-0709').supports_stop_words is False
assert get_features('grok-4-0709').supports_stop_words is False
@pytest.mark.parametrize(
'model',
[
'o1-mini',
'o1-2024-12-17',
'xai/grok-4-0709',
'deepseek/DeepSeek-R1-0528:671b-Q4_K_XL',
'DeepSeek-R1-0528',
],
)
def test_supports_stop_words_false_models(model):
features = get_features(model)
assert features.supports_stop_words is False

View File

@@ -1,118 +0,0 @@
"""
Test cases for conversation ID validation to ensure proper error handling.
This addresses GitHub issue #10489 where long conversation IDs were returning
500 Internal Server Error instead of proper 4xx errors.
"""
import pytest
from fastapi import HTTPException, status
from openhands.server.utils import validate_conversation_id
class TestConversationIdValidation:
"""Test conversation ID validation function."""
def test_valid_conversation_id(self):
"""Test that valid conversation IDs pass validation."""
# Test normal UUID hex format (32 characters)
valid_id = 'a1b2c3d4e5f6789012345678901234ab'
result = validate_conversation_id(valid_id)
assert result == valid_id
# Test shorter valid ID
valid_id = 'abc123'
result = validate_conversation_id(valid_id)
assert result == valid_id
# Test alphanumeric with hyphens (UUID format)
valid_id = 'a1b2c3d4-e5f6-7890-1234-5678901234ab'
result = validate_conversation_id(valid_id)
assert result == valid_id
def test_long_conversation_id_rejected(self):
"""Test that very long conversation IDs are rejected with 400 Bad Request.
This is the main test case for GitHub issue #10489.
"""
# Test with 1000 character ID (similar to the reported issue)
long_id = 'a' * 1000
with pytest.raises(HTTPException) as exc_info:
validate_conversation_id(long_id)
assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
assert 'too long' in exc_info.value.detail
def test_conversation_id_with_null_bytes_rejected(self):
"""Test that conversation IDs with null bytes are rejected."""
invalid_id = 'valid\x00id'
with pytest.raises(HTTPException) as exc_info:
validate_conversation_id(invalid_id)
assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
assert 'invalid characters' in exc_info.value.detail
def test_conversation_id_with_path_traversal_rejected(self):
"""Test that conversation IDs with path traversal attempts are rejected."""
invalid_ids = [
'../../../etc/passwd',
'id/../other',
'id\\..\\other',
'id/with/slashes',
'id\\with\\backslashes',
]
for invalid_id in invalid_ids:
with pytest.raises(HTTPException) as exc_info:
validate_conversation_id(invalid_id)
assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
assert 'invalid path characters' in exc_info.value.detail
def test_conversation_id_with_control_characters_rejected(self):
"""Test that conversation IDs with control characters are rejected."""
invalid_ids = [
'id\nwith\nnewlines',
'id\twith\ttabs',
'id\rwith\rcarriage',
'id\x01with\x02control',
]
for invalid_id in invalid_ids:
with pytest.raises(HTTPException) as exc_info:
validate_conversation_id(invalid_id)
assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
assert 'control characters' in exc_info.value.detail
def test_conversation_id_boundary_length(self):
"""Test conversation ID length boundaries."""
# Test exactly 100 characters (should pass)
boundary_id = 'a' * 100
result = validate_conversation_id(boundary_id)
assert result == boundary_id
# Test 101 characters (should fail)
too_long_id = 'a' * 101
with pytest.raises(HTTPException) as exc_info:
validate_conversation_id(too_long_id)
assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
assert 'too long' in exc_info.value.detail
def test_empty_conversation_id(self):
"""Test that empty conversation ID is handled."""
# Empty string should pass validation (will fail later in business logic)
result = validate_conversation_id('')
assert result == ''
def test_conversation_id_with_spaces(self):
"""Test that conversation IDs with spaces are allowed."""
# Spaces are printable characters (ASCII 32), so they should be allowed
# The business logic might reject them, but validation should pass
id_with_spaces = 'id with spaces'
result = validate_conversation_id(id_with_spaces)
assert result == id_with_spaces

Some files were not shown because too many files have changed in this diff Show More