Compare commits

...

140 Commits

Author SHA1 Message Date
Engel Nyst
b1b875d4aa Merge branch 'main' into openhands-fix-issue-8199 2025-05-21 22:11:01 +02:00
Engel Nyst
637cb0726a specify condenser config for evals (#8177)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-21 22:08:57 +02:00
tofarr
2bd10de636 Revert "Fix for issue where initial env vars are not passed to runtime" (#8624) 2025-05-21 20:00:56 +00:00
dependabot[bot]
70322c8418 chore(deps): bump the version-all group across 1 directory with 8 updates (#8617)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>
2025-05-21 19:34:59 +00:00
Rohit Malhotra
8b08958efe [Fix]: make mcp config optional in settings (#8622) 2025-05-21 19:17:43 +00:00
dependabot[bot]
5b021ad1bb chore(deps): bump the version-all group across 1 directory with 2 updates (#8618)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-21 16:39:07 +00:00
Rohit Malhotra
890796cc9d [Feat]: Git mcp server to open PRs (#8348)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
Co-authored-by: Robert Brennan <accounts@rbren.io>
2025-05-21 11:48:02 -04:00
sp.wack
7305c8fb31 hotfix(frontend): Prevent merging conversation events when switching between conversations (#8614) 2025-05-21 15:12:04 +00:00
Xingyao Wang
f1897b8095 docs: add Devstral MLX link for local models documentation (#8615) 2025-05-21 15:04:50 +00:00
Engel Nyst
c26ef180f2 Fix unsupported MCP tools param (#8610) 2025-05-21 14:41:01 +00:00
Robert Brennan
37e9933092 Revert "Fix passing environment" (#8612) 2025-05-21 14:32:47 +00:00
Xingyao Wang
c353fb6e7e docs: update local llm documentation (#8609)
Co-authored-by: mamoodi <mamoodiha@gmail.com>
2025-05-21 14:05:21 +00:00
chuckbutkus
3280f450ac Update to login if session times out and return to previous conversation (#8587)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-20 22:10:28 +00:00
Graham Neubig
6335afb010 Fix environment variable casting for dict and list types (#8494)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-20 17:47:07 -04:00
mamoodi
40d9b0b13a Remove duplicated documentation from cognition (#8600) 2025-05-20 21:23:12 +00:00
Boxuan Li
005c5d6bde Document native windows support without WSL (#8567)
Co-authored-by: mamoodi <mamoodiha@gmail.com>
2025-05-20 20:54:32 +00:00
Rohit Malhotra
0deabd5935 [Feat]: add context msg to new conversation endpoint (#8586)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-05-20 20:47:15 +00:00
Erkin Alp Güney
6f5bb4341f Add a DeepWiki reference (#8350) 2025-05-20 16:20:56 -04:00
dependabot[bot]
a5daf0e3c1 chore(deps): bump the version-all group with 6 updates (#8596)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-20 21:35:22 +02:00
tofarr
3873d9f002 Fix for issue where initial env vars are not passed to runtime (#8597) 2025-05-20 10:58:43 -06:00
Robert Brennan
5e1039e4b5 Make chat auto-scroll functionality more explicit (#8562)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-20 12:58:05 -04:00
mamoodi
d648d249d8 Release 0.39.0 (#8539) 2025-05-20 11:39:55 -04:00
tofarr
aa55da27fa Fix unlocalized strings in frontend components (#8585)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-19 19:12:26 +00:00
tofarr
e69d6b3ef1 fix: add missing translation keys to declaration.ts (#8580)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-19 11:49:57 -06:00
tofarr
be1ddaa57d Add conversationUrl static variable with getter and setter methods (#8531)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-19 11:28:07 -06:00
Robert Brennan
7b59e81048 make scrollbar prettier (#8581) 2025-05-19 17:23:47 +00:00
dependabot[bot]
470687f826 chore(deps): bump the mcp-packages group with 2 updates (#8546)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-19 18:37:11 +02:00
tofarr
38b4d93237 Add Session API Key Authentication for Runtime Communication (#8550) 2025-05-19 09:59:22 -06:00
dependabot[bot]
872b97a3c8 chore(deps): bump the version-all group across 1 directory with 20 updates (#8545)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>
2025-05-19 15:49:53 +00:00
sp.wack
14334040f1 chore(frontend): Refactor chat interface-related event handling (#8403) 2025-05-19 15:15:09 +00:00
sp.wack
b244138ec5 fix(frontend): Prevent making too many calls to /git/changes on conversation load (#8579) 2025-05-19 18:57:18 +04:00
Xingyao Wang
4a3d2e6859 Fix #8551: Show images produced in Jupyter Notebook to LLM directly (#8552)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-19 14:14:00 +00:00
luolin101
1a3cb16ba6 add Visual SWE-bench benchmark (#7131)
Co-authored-by: tsukimi <yuailun@pku.edu.cn>
Co-authored-by: Ryan H. Tran <descience.thh10@gmail.com>
2025-05-19 12:08:46 +07:00
Xingyao Wang
2ecc39ffcc [eval]: disable MCP for SWE-Bench evaluation (#8574)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Engel Nyst <engel.nyst@gmail.com>
2025-05-19 01:32:46 +00:00
Graham Neubig
0b26174d60 Add documentation microagent (#8563)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-18 16:34:15 -04:00
Calvin Smith
b0005d4e09 Limit size of browser events (#8559)
Co-authored-by: Calvin Smith <calvin@all-hands.dev>
2025-05-18 11:35:09 -06:00
Graham Neubig
2dc7b37fe8 Fix flaky TestLocalFileStore tests (#8569)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-18 06:37:28 -04:00
Carlos Freund
27c18f5bdd build(makefile) Develop in OpenhandsCloud (#7440)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-05-17 14:40:46 +00:00
Graham Neubig
5077fea5c7 Fix: Run setup.sh script in GitHub resolver (#8548)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-17 09:52:34 -04:00
Justin Coffi
bf383b4881 Add SSH Microagent (#8436)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Xingyao Wang <xingyaoww@gmail.com>
2025-05-17 05:56:51 +00:00
OpenHands
c17b0ebfc6 Fix issue #8304: [Bug]: Non-native tool use converter fails when builtin tools are disabled (#8310)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Engel Nyst <engel.nyst@gmail.com>
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2025-05-17 06:37:45 +02:00
Xingyao Wang
1f390430e5 feat(MCP, microagent): MCP-support for Repo Microagent & add fetch as default tool (#8360)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-05-16 23:32:38 +00:00
Robert Brennan
819bad0777 Fix: Only show login modal for genuine 401 errors, not connection issues (#8540)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 13:33:06 -04:00
dependabot[bot]
2faed14139 chore(deps): bump the version-all group with 5 updates (#8541)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 17:17:38 +00:00
tofarr
4733270e3c Add event search endpoints with filtering and pagination (#8538)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 09:51:40 -06:00
Robert Brennan
21dd91de63 Add info logging for 401 Unauthorized responses (#8527)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 11:46:15 -04:00
Graham Neubig
25619c5a93 Fix #8510: Improve error messages for invalid microagent format (#8511)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 15:01:39 +00:00
Robert Brennan
15f71e7ed6 Update microagent message from "Activated" to "ready" (#8536)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 10:44:37 -04:00
mamoodi
5b583e5f27 Organization and user microagents (#8506) 2025-05-16 10:42:32 -04:00
Rohit Malhotra
c191a17afb [Fix]: don't access secrets if doesn't exist (#8535)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 14:30:16 +00:00
Robert Brennan
8ec5d0e043 Add WebSocket connection documentation (#8404)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 10:12:05 -04:00
Robert Brennan
f3f038bb60 Fix mypy error for pkg_resources import (#8537)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 14:06:41 +00:00
Robert Brennan
b8d3027cfe Always enable logout button regardless of GitHub connection status (#8529)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 09:09:33 -04:00
Rohit Malhotra
feb04dc65f Plumb custom secrets to runtime (#8330)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-15 20:06:30 -04:00
Engel Nyst
1f827170f4 Fix resolver test (#8530) 2025-05-15 21:57:02 +00:00
Engel Nyst
f7cb2d0f64 Restore previous conversation in CLI (#8431) 2025-05-15 23:47:41 +02:00
tofarr
033788c2d0 API Updates to facilitate nested runtimes. (#8525) 2025-05-15 15:38:09 -06:00
dependabot[bot]
21d0990be4 chore(deps): bump the version-all group with 7 updates (#8522)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-05-15 20:17:51 +00:00
tofarr
6227073cff Add missing Ukrainian translations for secrets-related keys (#8526)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-15 19:25:06 +00:00
Graham Neubig
4c38113cb7 Fix CI to check for missing translations (#8486)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-15 13:31:44 -04:00
Emmanuel Ferdman
fb516dfa0f Remove obsolete task.py file (#8517)
Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>
2025-05-15 18:54:25 +02:00
sp.wack
04d585513c feat: secrets manager settings (#8068)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: rohitvinodmalhotra@gmail.com <rohitvinodmalhotra@gmail.com>
2025-05-15 11:30:10 -04:00
Robert Brennan
7a4ea23b9d Revert "Add Docker, Java, Golang, and other programming languages to runtime image" (#8518) 2025-05-15 14:29:15 +00:00
sp.wack
7490c1927f fix(frontend): Failing tests (#8519) 2025-05-15 14:20:52 +00:00
tofarr
8d2ac59909 Fix passing environment (#8483)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-15 07:23:21 -06:00
Ryan H. Tran
68e5f485aa fix: validation error when saving SSE MCP server url on the UI (#8502) 2025-05-15 06:35:06 +00:00
Graham Neubig
e4c284f96d Add timeout parameter to bash tool for hard timeout control (#8106)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2025-05-15 13:24:42 +08:00
Yueqi Song
3ca585b79f Update run_infer.py to incorporate selection of task based on repo (#8509) 2025-05-15 12:27:28 +08:00
tofarr
7e88d4195f Refactor event store cleanup (#8505)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-14 16:20:43 -06:00
Engel Nyst
f046efd53d Revert "Make str_replace_editor description more clear (#8434)" (#8501) 2025-05-15 04:08:16 +08:00
Engel Nyst
e4586432ad Add top_k (#8480) 2025-05-14 21:46:03 +02:00
sp.wack
d956abe56b fix(frontend): Show actions when idle (#8507) 2025-05-14 18:53:32 +00:00
Robert Brennan
6145552841 Add Docker, Java, Golang, and other programming languages to runtime image (#8026)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-14 13:46:06 -04:00
dependabot[bot]
b1dca48c8e chore(deps): bump the version-all group with 6 updates (#8504)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-05-14 16:39:01 +00:00
Robert Brennan
81ba80dde0 Fix issue #8327: Unable to delete the entire default branch name in input box (#8329)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-14 12:07:12 -04:00
mamoodi
08a790c4ca Update default model to sonnet 3.7 in all applicable places (#8489)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-14 10:55:34 -04:00
Robert Brennan
1b57fd4d1e Remove github_user_id in favor of user_id (#8406)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-14 10:17:35 -04:00
Robert Brennan
c36cbf6543 Fix padding on last paragraph in messages (#8491)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2025-05-14 10:24:21 +00:00
Xingyao Wang
67d438ea4f Fix workspace mount behavior with SANDBOX_VOLUMES (#8500) 2025-05-14 14:58:03 +08:00
Graham Neubig
154eed148f Fix typing in server directory (#8375)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Rohit Malhotra <rohitvinodmalhotra@gmail.com>
2025-05-13 21:27:59 +00:00
Graham Neubig
f9b0fcd76e Add API documentation link to API Keys tab in settings (#8363)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-13 20:09:58 +00:00
mamoodi
0782aeb1c4 Update recommended models (#8488)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-05-13 15:46:22 -04:00
dependabot[bot]
55fbb65e05 chore(deps): bump the version-all group across 1 directory with 12 updates (#8478)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-13 21:20:50 +02:00
mamoodi
1abed30b44 Update MCP docs and LLM docs with more accurate information (#8482) 2025-05-13 13:09:25 -04:00
tofarr
1f29ec836b Fix missing translations in frontend i18n files (#8481)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-13 17:02:06 +00:00
Rohit Malhotra
81c754ec65 [Fix]: Don't allow endpoint to modify conversation trigger (#8396) 2025-05-13 11:57:32 -04:00
Rohit Malhotra
880ec57c78 [Fix]: Status icon regression (#8427) 2025-05-13 11:43:35 -04:00
mamoodi
e06aac7521 Remove unnecessary frontmatter from repo microagent (#8477) 2025-05-13 11:32:35 -04:00
Graham Neubig
60d9b519e0 Add proper typing to cli directory (#8374)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-13 09:55:44 -04:00
Graham Neubig
5ad11e73b8 Proposed update to resolver prompt (#8473) 2025-05-13 13:48:23 +00:00
Graham Neubig
3e5b16b348 Fix translation completeness issues (#8472)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-13 09:42:33 -04:00
Graham Neubig
f3d0ae3fbf Add type annotations to local runtime implementation (#8376)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-13 09:42:07 -04:00
Engel Nyst
dea3ddfcc6 Clean up codecov (#8465) 2025-05-13 14:51:47 +02:00
kotauchisunsun
31b2f3c9c2 [refactor]: Refactor sandbox configuration setup in IssueResolver class (#8414) 2025-05-13 13:37:15 +02:00
omahs
4bb6ec2ee5 Fix typos (#8469) 2025-05-13 09:34:21 +00:00
Engel Nyst
ae8ed49280 Make str_replace_editor description more clear (#8434) 2025-05-13 13:08:53 +07:00
mamoodi
786e21fb8a Add more run eval options (#8468) 2025-05-13 02:33:14 +00:00
Graham Neubig
f317c03b1b Fix inconsistent max_iterations in SWE-bench evaluation (#8467)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-13 02:07:57 +00:00
Chase
e72153629d fix #8424: change native_tool_calling semantics (#8463) 2025-05-12 19:21:51 -04:00
mamoodi
b127d5f656 Add exc_info to remote runtime log (#8457) 2025-05-12 15:45:58 -05:00
tofarr
f75fa50b80 Add number of connections to Conversation Info (#8456)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-12 13:57:52 -06:00
mamoodi
5a927c8651 Release 0.38.0 (#8446) 2025-05-12 15:41:14 -04:00
chuckbutkus
2693360ad0 Auth URL fix of on-prem (#8455) 2025-05-12 17:28:04 +00:00
sp.wack
1081f8091d improve(frontend): Changes tab status message logic (#8454) 2025-05-12 12:57:35 -04:00
sp.wack
8d0e5c6c34 hotfix: Don't handle git changes side effect too frequently (#8451) 2025-05-12 12:57:00 -04:00
Robert Brennan
0b897ff3dc Add 10px bottom padding to paragraph tags in markdown rendering (#8440)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-12 11:03:17 -04:00
Ryan H. Tran
c5ace563c4 fix: remove duplicate rendering of tab components (#8442)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2025-05-12 11:15:41 +00:00
Engel Nyst
9af132933c Fix log for clearing pending action to be at the same level (#8430) 2025-05-12 02:50:36 +02:00
Engel Nyst
10c56932af Fix: update pre-commit docs (#8433)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-12 02:13:33 +02:00
Robert Brennan
e9905115c4 Allow websocket connection to pass in Authorization header to conversation validator (#8405)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-11 16:27:02 -04:00
Emmanuel Ferdman
6b11fff735 Resolve warnings of logger library (#8432)
Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>
2025-05-11 15:29:53 +00:00
Xingyao Wang
3d02c0c3a3 Fix issue #8372: Implement browser screenshot saving functionality (#8383)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Graham Neubig <neubig@gmail.com>
2025-05-11 15:51:18 +08:00
kotauchisunsun
a17c57d82e [refactor]: Refactored the initialization of issue_handler within IssueResolver (#8417)
Co-authored-by: Rohit Malhotra <rohitvinodmalhotra@gmail.com>
2025-05-10 21:27:06 -04:00
Muly Oved
da637a0dad Update docker_runtime.py #8422 (#8423) 2025-05-11 00:08:14 +02:00
Polly
27c49471a8 Fix Bug #8425 - Enable prompt cache for OpenRouter model of calude-3.7-sonnet (#8426) 2025-05-11 00:07:31 +02:00
Robert Brennan
bffe8de597 Add support for user/org level microagents (#8402)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-10 09:34:34 -04:00
OpenHands
f0bb7de1c6 Fix issue #8145: Add docs about runtime tests (#8146) 2025-05-10 12:40:35 +02:00
Polly
90aab29bc0 Fix Issue #8413 max_output_tokens in openrouter/anthropic/claude-3.7-sonnet doesn't work correctly (#8415) 2025-05-10 08:29:39 +00:00
sp.wack
ade059bfba feat/fix(fontend): Get public repos via repo URL (#8223)
Co-authored-by: Robert Brennan <accounts@rbren.io>
Co-authored-by: rohitvinodmalhotra@gmail.com <rohitvinodmalhotra@gmail.com>
2025-05-09 23:45:33 +00:00
Robert Brennan
5073cee7ff Add CODEOWNERS file (#8359)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>
2025-05-09 21:54:46 +00:00
Rohit Malhotra
dc4b06f96b [Docs]: fix param key for openhands api docs (#8399) 2025-05-09 19:11:51 +00:00
Engel Nyst
b75bad16e4 Merge branch 'main' into openhands-fix-issue-8199 2025-05-09 20:36:59 +02:00
Engel Nyst
226d1ecd9f Merge branch 'main' into openhands-fix-issue-8199 2025-05-09 20:06:55 +02:00
Engel Nyst
cf9e17e85a fix raise on git config 2025-05-02 18:56:37 +02:00
Engel Nyst
ec22a15b6b Merge branch 'main' into openhands-fix-issue-8199 2025-05-02 18:39:42 +02:00
Engel Nyst
d2466d2570 Merge branch 'main' into openhands-fix-issue-8199 2025-05-02 13:33:25 +02:00
Engel Nyst
6c66e18388 Merge branch 'main' into openhands-fix-issue-8199 2025-05-02 08:58:56 +02:00
Engel Nyst
74b60c4930 Update openhands/resolver/send_pull_request.py 2025-05-02 02:38:53 +02:00
openhands
560901262b Fix pr #8212: Fix the resolver to comment / finish without code changes 2025-05-02 00:34:24 +00:00
openhands
eec1fa9abf Fix pr #8212: Fix the resolver to comment / finish without code changes 2025-05-02 00:01:30 +00:00
Engel Nyst
11bd0289e0 Update openhands/resolver/interfaces/issue_definitions.py 2025-05-02 00:55:52 +02:00
Engel Nyst
23edbd56b8 Update openhands/resolver/interfaces/issue_definitions.py 2025-05-02 00:55:22 +02:00
Engel Nyst
2141473907 Update openhands/resolver/interfaces/issue_definitions.py 2025-05-02 00:54:54 +02:00
Engel Nyst
a6194ea990 Update tests/unit/resolver/github/test_send_pull_request.py 2025-05-02 00:53:47 +02:00
Engel Nyst
8c0dfdfe0a Update openhands/resolver/interfaces/issue_definitions.py 2025-05-02 00:50:23 +02:00
OpenHands Bot
2496b8592e 🤖 Auto-fix Python linting issues 2025-05-01 22:44:38 +00:00
openhands
8bf1db8cce Fix pr #8212: Fix issue #8199: [Bug]: Fix the resolver to comment / finish without code changes 2025-05-01 22:41:02 +00:00
openhands
ce2dc26b47 Fix pr #8212: Fix issue #8199: [Bug]: Fix the resolver to comment / finish without code changes 2025-05-01 22:28:46 +00:00
openhands
8c204936ee Fix pr #8212: Fix issue #8199: [Bug]: Fix the resolver to comment / finish without code changes 2025-05-01 21:28:07 +00:00
openhands
aeba03b0e7 Fix issue #8199: [Bug]: Fix the resolver to comment / finish without code changes 2025-05-01 20:07:03 +00:00
331 changed files with 14793 additions and 6607 deletions

19
.github/.codecov.yml vendored
View File

@@ -1,19 +0,0 @@
codecov:
notify:
wait_for_ci: true
# our project is large, so 6 builds are typically uploaded. this waits till 5/6
# See https://docs.codecov.com/docs/notifications#section-preventing-notifications-until-after-n-builds
after_n_builds: 5
coverage:
status:
patch:
default:
threshold: 100% # allow patch coverage to be lower than project coverage by any amount
project:
default:
threshold: 5% # allow project coverage to drop at most 5%
comment: false
github_checks:
annotations: false

11
.github/CODEOWNERS vendored Normal file
View File

@@ -0,0 +1,11 @@
# CODEOWNERS file for OpenHands repository
# See https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
# Frontend code owners
/frontend/ @rbren @amanape
# Evaluation code owners
/evaluation/ @xingyaoww @neubig
# Documentation code owners
/docs/ @mamoodi

View File

@@ -13,6 +13,10 @@ updates:
browsergym:
patterns:
- "browsergym*"
mcp-packages:
patterns:
- "mcp"
- "mcpm"
security-all:
applies-to: "security-updates"
patterns:

View File

@@ -42,7 +42,3 @@ jobs:
- name: Run tests and collect coverage
working-directory: ./frontend
run: npm run test:coverage
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

View File

@@ -312,11 +312,7 @@ jobs:
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
RUN_AS_OPENHANDS=false \
poetry run pytest -n 7 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py --durations=10
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
poetry run pytest -n 7 -raRs --reruns 2 --reruns-delay 5 -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py --durations=10
# Run unit tests with the Docker runtime Docker images as openhands user
test_runtime_oh:
@@ -381,11 +377,7 @@ jobs:
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
RUN_AS_OPENHANDS=true \
poetry run pytest -n 7 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py --durations=10
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
poetry run pytest -n 7 -raRs --reruns 2 --reruns-delay 5 -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py --durations=10
# The two following jobs (named identically) are to check whether all the runtime tests have passed as the
# "All Runtime Tests Passed" is a required job for PRs to merge

View File

@@ -30,11 +30,12 @@ jobs:
run: |
cd frontend
npm install --frozen-lockfile
- name: Lint and TypeScript compilation
- name: Lint, TypeScript compilation, and translation checks
run: |
cd frontend
npm run lint
npm run make-i18n && tsc
npm run check-translation-completeness
# Run lint on the python code
lint-python:

View File

@@ -48,11 +48,7 @@ jobs:
- name: Build Environment
run: make build
- name: Run Tests
run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
run: poetry run pytest --forked -n auto -svv ./tests/unit
# Run specific Windows python tests
test-on-windows:

View File

@@ -9,7 +9,7 @@ on:
jobs:
trigger-job:
name: Trigger remote eval job
if: ${{ github.event.label.name == 'run-eval-xs' || github.event.label.name == 'run-eval-s' || github.event.label.name == 'run-eval-m' }}
if: ${{ github.event.label.name == 'run-eval-1' || github.event.label.name == 'run-eval-2' || github.event.label.name == 'run-eval-50' || github.event.label.name == 'run-eval-100' }}
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
@@ -26,12 +26,14 @@ jobs:
echo "Repository URL: $REPO_URL"
echo "PR Branch: $PR_BRANCH"
if [[ "${{ github.event.label.name }}" == "run-eval-xs" ]]; then
if [[ "${{ github.event.label.name }}" == "run-eval-1" ]]; then
EVAL_INSTANCES="1"
elif [[ "${{ github.event.label.name }}" == "run-eval-s" ]]; then
EVAL_INSTANCES="5"
elif [[ "${{ github.event.label.name }}" == "run-eval-m" ]]; then
EVAL_INSTANCES="30"
elif [[ "${{ github.event.label.name }}" == "run-eval-2" ]]; then
EVAL_INSTANCES="2"
elif [[ "${{ github.event.label.name }}" == "run-eval-50" ]]; then
EVAL_INSTANCES="50"
elif [[ "${{ github.event.label.name }}" == "run-eval-100" ]]; then
EVAL_INSTANCES="100"
fi
curl -X POST \

View File

@@ -0,0 +1,33 @@
---
name: documentation
type: knowledge
version: 1.0.0
agent: CodeActAgent
triggers:
- documentation
- docs
- document
---
# Documentation Guidelines
All documentation must be grounded in fact, so you must not make anything up without proper evidence. When you have finished writing documentation, convey to the user what reference source, including web pages, source code, or other sources of documentation you referenced when writing each new fact in the documentation. If you cannot reference a source for anything do not include it in the pull request.
## Best Practices for Documentation
1. **Be Factual**: Only include information that can be verified from reliable sources.
2. **Cite Sources**: Always reference the source of information (code, web pages, official documentation).
3. **Be Clear and Concise**: Use simple language and avoid unnecessary jargon.
4. **Use Examples**: Include practical examples to illustrate concepts.
5. **Structure Properly**: Use headings, lists, and code blocks to organize information.
6. **Keep Updated**: Ensure documentation reflects the current state of the code or system.
## Documentation Process
1. Research and gather information from reliable sources
2. Draft documentation based on verified facts
3. Review for accuracy and completeness
4. Include references for all factual statements
5. Submit only when all information is properly sourced
Remember: If you cannot verify a piece of information, it's better to exclude it than to include potentially incorrect information.

View File

@@ -1,8 +1,3 @@
---
name: repo
type: repo
agent: CodeActAgent
---
This repository contains the code for OpenHands, an automated AI software engineer. It has a Python backend
(in the `openhands` directory) and React frontend (in the `frontend` directory).
@@ -14,7 +9,7 @@ IMPORTANT: Before making any changes to the codebase, ALWAYS run `make install-p
Before pushing any changes, you MUST ensure that any lint errors or simple test errors have been fixed.
* If you've made changes to the backend, you should run `pre-commit run --all-files --config ./dev_config/python/.pre-commit-config.yaml`
* If you've made changes to the backend, you should run `pre-commit run --config ./dev_config/python/.pre-commit-config.yaml` (this will run on staged files).
* If you've made changes to the frontend, you should run `cd frontend && npm run lint:fix && npm run build ; cd ..`
The pre-commit hooks MUST pass successfully before pushing any changes to the repository. This is a mandatory requirement to maintain code quality and consistency.

View File

@@ -1,8 +1,8 @@
# Development Guide
This guide is for people working on OpenHands and editing the source code.
If you wish to contribute your changes, check out the [CONTRIBUTING.md](https://github.com/All-Hands-AI/OpenHands/blob/main/CONTRIBUTING.md) on how to clone and setup the project initially before moving on.
Otherwise, you can clone the OpenHands project directly.
If you wish to contribute your changes, check out the [CONTRIBUTING.md](https://github.com/All-Hands-AI/OpenHands/blob/main/CONTRIBUTING.md) on how to clone and setup the project
initially before moving on. Otherwise, you can clone the OpenHands project directly.
## Start the Server for Development
@@ -21,7 +21,8 @@ Make sure you have all these dependencies installed before moving on to `make bu
#### Develop without sudo access
If you want to develop without system admin/sudo access to upgrade/install `Python` and/or `NodeJs`, you can use `conda` or `mamba` to manage the packages for you:
If you want to develop without system admin/sudo access to upgrade/install `Python` and/or `NodeJs`, you can use
`conda` or `mamba` to manage the packages for you:
```bash
# Download and install Mamba (a faster version of conda)
@@ -36,7 +37,8 @@ mamba install conda-forge::poetry
### 2. Build and Setup The Environment
Begin by building the project which includes setting up the environment and installing dependencies. This step ensures that OpenHands is ready to run on your system:
Begin by building the project which includes setting up the environment and installing dependencies. This step ensures
that OpenHands is ready to run on your system:
```bash
make build
@@ -45,8 +47,6 @@ make build
### 3. Configuring the Language Model
OpenHands supports a diverse array of Language Models (LMs) through the powerful [litellm](https://docs.litellm.ai) library.
By default, we've chosen Claude Sonnet 3.5 as our go-to model, but the world is your oyster! You can unleash the
potential of any other LM that piques your interest.
To configure the LM of your choice, run:
@@ -54,9 +54,12 @@ To configure the LM of your choice, run:
make setup-config
```
This command will prompt you to enter the LLM API key, model name, and other variables ensuring that OpenHands is tailored to your specific needs. Note that the model name will apply only when you run headless. If you use the UI, please set the model in the UI.
This command will prompt you to enter the LLM API key, model name, and other variables ensuring that OpenHands is
tailored to your specific needs. Note that the model name will apply only when you run headless. If you use the UI,
please set the model in the UI.
Note: If you have previously run OpenHands using the docker command, you may have already set some environmental variables in your terminal. The final configurations are set from highest to lowest priority:
Note: If you have previously run OpenHands using the docker command, you may have already set some environmental
variables in your terminal. The final configurations are set from highest to lowest priority:
Environment variables > config.toml variables > default variables
**Note on Alternative Models:**
@@ -74,13 +77,15 @@ make run
#### Option B: Individual Server Startup
- **Start the Backend Server:** If you prefer, you can start the backend server independently to focus on backend-related tasks or configurations.
- **Start the Backend Server:** If you prefer, you can start the backend server independently to focus on
backend-related tasks or configurations.
```bash
make start-backend
```
- **Start the Frontend Server:** Similarly, you can start the frontend server on its own to work on frontend-related components or interface enhancements.
- **Start the Frontend Server:** Similarly, you can start the frontend server on its own to work on frontend-related
components or interface enhancements.
```bash
make start-frontend
```
@@ -115,10 +120,10 @@ poetry run pytest ./tests/unit/test_*.py
### 9. Use existing Docker image
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker
container image by setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.37-nikolaik`
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.39-nikolaik`
## Develop inside Docker container

View File

@@ -3,17 +3,12 @@ These are the procedures and guidelines on how issues are triaged in this repo b
## General
* All issues must be tagged with **enhancement**, **bug** or **troubleshooting/help**.
* Issues may be tagged with what it relates to (**agent quality**, **frontend**, **resolver**, etc.).
* Issues may be tagged with what it relates to (**agent quality**, **resolver**, **CLI**, etc.).
## Severity
* **Low**: Minor issues or affecting single user.
* **Medium**: Affecting multiple users.
* **High**: High visibility issues or affecting many users.
* **Critical**: Affecting all users or potential security issues.
## Effort
* Issues may be estimated with effort required (**small effort**, **medium effort**, **large effort**).
## Difficulty
* Issues with low implementation difficulty may be tagged with **good first issue**.

View File

@@ -5,6 +5,7 @@ SHELL=/usr/bin/env bash
BACKEND_HOST ?= "127.0.0.1"
BACKEND_PORT = 3000
BACKEND_HOST_PORT = "$(BACKEND_HOST):$(BACKEND_PORT)"
FRONTEND_HOST ?= "127.0.0.1"
FRONTEND_PORT = 3001
DEFAULT_WORKSPACE_DIR = "./workspace"
DEFAULT_MODEL = "gpt-4o"
@@ -288,6 +289,15 @@ setup-config-prompts:
@read -p "Enter your LLM base URL [mostly used for local LLMs, leave blank if not needed - example: http://localhost:5001/v1/]: " llm_base_url; \
if [[ ! -z "$$llm_base_url" ]]; then echo "base_url=\"$$llm_base_url\"" >> $(CONFIG_FILE).tmp; fi
setup-config-basic:
@printf '%s\n' \
'[core]' \
'workspace_base="./workspace"' \
> config.toml
@echo "$(GREEN)config.toml created.$(RESET)"
openhands-cloud-run:
@$(MAKE) run BACKEND_HOST="0.0.0.0" BACKEND_PORT="12000" FRONTEND_HOST="0.0.0.0" FRONTEND_PORT="12001"
# Develop in container
docker-dev:
@@ -322,5 +332,4 @@ help:
@echo " $(GREEN)help$(RESET) - Display this help message, providing information on available targets."
# Phony targets
.PHONY: build check-dependencies check-python check-npm check-docker check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint start-backend start-frontend run run-wsl setup-config setup-config-prompts help
.PHONY: docker-dev docker-run
.PHONY: build check-dependencies check-system check-python check-npm check-nodejs check-docker check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint-backend lint-frontend lint test-frontend test build-frontend start-backend start-frontend _run_setup run run-wsl setup-config setup-config-prompts setup-config-basic openhands-cloud-run docker-dev docker-run clean help

View File

@@ -51,17 +51,17 @@ system requirements and more information.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.39
```
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
@@ -92,6 +92,7 @@ If you want to modify the OpenHands source code, check out [Development.md](http
Having issues? The [Troubleshooting Guide](https://docs.all-hands.dev/modules/usage/troubleshooting) can help.
## 📖 Documentation
<a href="https://deepwiki.com/All-Hands-AI/OpenHands"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki" title="Autogenerated Documentation by DeepWiki"></a>
To learn more about the project, and for tips on using OpenHands,
check out our [documentation](https://docs.all-hands.dev/modules/usage/getting-started).

View File

@@ -11,7 +11,7 @@ services:
- BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
- SANDBOX_API_HOSTNAME=host.docker.internal
#
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.37-nikolaik}
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.39-nikolaik}
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
ports:

View File

@@ -7,7 +7,7 @@ services:
image: openhands:latest
container_name: openhands-app-${DATE:-}
environment:
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik}
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik}
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
ports:

View File

@@ -69,7 +69,7 @@ data = {
response = requests.post(url, headers=headers, json=data)
conversation = response.json()
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['id']}")
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['conversation_id']}")
print(f"Status: {conversation['status']}")
```
</details>

View File

@@ -37,7 +37,7 @@ Pour exécuter OpenHands en mode CLI avec Docker :
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -46,7 +46,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.39 \
python -m openhands.core.cli
```

View File

@@ -34,7 +34,7 @@ Pour exécuter OpenHands en mode Headless avec Docker :
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -44,7 +44,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.39 \
python -m openhands.core.main -t "write a bash script that prints hi"
```

View File

@@ -58,17 +58,17 @@ Un système avec un processeur moderne et un minimum de **4 Go de RAM** est reco
La façon la plus simple d'exécuter OpenHands est dans Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.39
```
Vous trouverez OpenHands en cours d'exécution à l'adresse http://localhost:3000 !

View File

@@ -69,7 +69,7 @@ data = {
response = requests.post(url, headers=headers, json=data)
conversation = response.json()
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['id']}")
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['conversation_id']}")
print(f"Status: {conversation['status']}")
```
</details>

View File

@@ -36,7 +36,7 @@ DockerでOpenHandsをCLIモードで実行するには
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -45,7 +45,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.39 \
python -m openhands.core.cli
```

View File

@@ -33,7 +33,7 @@ DockerでヘッドレスモードでOpenHandsを実行するには
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -43,7 +43,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.39 \
python -m openhands.core.main -t "write a bash script that prints hi"
```

View File

@@ -58,17 +58,17 @@ OpenHandsを実行するには、最新のプロセッサと最低**4GB RAM**を
OpenHandsを実行する最も簡単な方法はDockerを使用することです。
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.39
```
OpenHandsは http://localhost:3000 で実行されています!

View File

@@ -69,7 +69,7 @@ data = {
response = requests.post(url, headers=headers, json=data)
conversation = response.json()
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['id']}")
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['conversation_id']}")
print(f"Status: {conversation['status']}")
```
</details>

View File

@@ -37,7 +37,7 @@ Para executar o OpenHands no modo CLI com Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -46,7 +46,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.39 \
python -m openhands.core.cli
```

View File

@@ -34,7 +34,7 @@ Para executar o OpenHands em modo Headless com Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -44,7 +44,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.39 \
python -m openhands.core.main -t "write a bash script that prints hi"
```

View File

@@ -58,17 +58,17 @@
A maneira mais fácil de executar o OpenHands é no Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.39
```
Você encontrará o OpenHands rodando em http://localhost:3000!

View File

@@ -69,7 +69,7 @@ data = {
response = requests.post(url, headers=headers, json=data)
conversation = response.json()
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['id']}")
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['conversation_id']}")
print(f"Status: {conversation['status']}")
```
</details>

View File

@@ -36,7 +36,7 @@ poetry run python -m openhands.core.cli
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -45,7 +45,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.39 \
python -m openhands.core.cli
```

View File

@@ -33,7 +33,7 @@ poetry run python -m openhands.core.main -t "write a bash script that prints hi"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -43,7 +43,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.39 \
python -m openhands.core.main -t "write a bash script that prints hi"
```

View File

@@ -58,17 +58,17 @@
运行 OpenHands 最简单的方法是使用 Docker。
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.39
```
OpenHands 将在 http://localhost:3000 运行!

View File

@@ -70,7 +70,7 @@ data = {
response = requests.post(url, headers=headers, json=data)
conversation = response.json()
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['id']}")
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['conversation_id']}")
print(f"Status: {conversation['status']}")
```
</details>

View File

@@ -31,7 +31,7 @@ This command opens an interactive prompt where you can type tasks or commands an
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -40,7 +40,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.39 \
python -m openhands.cli.main
```

View File

@@ -31,7 +31,7 @@ To run OpenHands in Headless mode with Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -41,7 +41,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.39 \
python -m openhands.core.main -t "write a bash script that prints hi"
```

View File

@@ -0,0 +1,181 @@
---
sidebar_position: 9
---
# Connecting to the WebSocket
This guide explains how to connect to the OpenHands WebSocket API to receive real-time events and send actions to the agent.
## Overview
OpenHands uses [Socket.IO](https://socket.io/) for WebSocket communication between the client and server. The WebSocket connection allows you to:
1. Receive real-time events from the agent
2. Send user actions to the agent
3. Maintain a persistent connection for ongoing conversations
## Connecting to the WebSocket
### Connection Parameters
When connecting to the WebSocket, you need to provide the following query parameters:
- `conversation_id`: The ID of the conversation you want to join
- `latest_event_id`: The ID of the latest event you've received (use `-1` for a new connection)
- `providers_set`: (Optional) A comma-separated list of provider types
### Connection Example
Here's a basic example of connecting to the WebSocket using JavaScript:
```javascript
import { io } from "socket.io-client";
const socket = io("http://localhost:3000", {
transports: ["websocket"],
query: {
conversation_id: "your-conversation-id",
latest_event_id: -1,
providers_set: "github,gitlab" // Optional
}
});
socket.on("connect", () => {
console.log("Connected to OpenHands WebSocket");
});
socket.on("oh_event", (event) => {
console.log("Received event:", event);
});
socket.on("connect_error", (error) => {
console.error("Connection error:", error);
});
socket.on("disconnect", (reason) => {
console.log("Disconnected:", reason);
});
```
## Sending Actions to the Agent
To send an action to the agent, use the `oh_user_action` event:
```javascript
// Send a user message to the agent
socket.emit("oh_user_action", {
type: "message",
source: "user",
message: "Hello, can you help me with my project?"
});
```
## Receiving Events from the Agent
The server emits events using the `oh_event` event type. Here are some common event types you might receive:
- User messages (`source: "user", type: "message"`)
- Agent messages (`source: "agent", type: "message"`)
- File edits (`action: "edit"`)
- File writes (`action: "write"`)
- Command executions (`action: "run"`)
Example event handler:
```javascript
socket.on("oh_event", (event) => {
if (event.source === "agent" && event.type === "message") {
console.log("Agent says:", event.message);
} else if (event.action === "run") {
console.log("Command executed:", event.args.command);
console.log("Result:", event.result);
}
});
```
## Using Websocat for Testing
[Websocat](https://github.com/vi/websocat) is a command-line tool for interacting with WebSockets. It's useful for testing your WebSocket connection without writing a full client application.
### Installation
```bash
# On macOS
brew install websocat
# On Linux
curl -L https://github.com/vi/websocat/releases/download/v1.11.0/websocat.x86_64-unknown-linux-musl > websocat
chmod +x websocat
sudo mv websocat /usr/local/bin/
```
### Connecting to the WebSocket
```bash
# Connect to the WebSocket and print all received messages
echo "40{}" | \
websocat "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
```
### Sending a Message
```bash
# Send a message to the agent
echo '42["oh_user_action",{"type":"message","source":"user","message":"Hello, agent!"}]' | \
websocat "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
```
### Complete Example with Websocat
Here's a complete example of connecting to the WebSocket, sending a message, and receiving events:
```bash
# Start a persistent connection
websocat -v "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
# In another terminal, send a message
echo '42["oh_user_action",{"type":"message","source":"user","message":"Can you help me with my project?"}]' | \
websocat "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
```
## Event Structure
Events sent and received through the WebSocket follow a specific structure:
```typescript
interface OpenHandsEvent {
id: string; // Unique event ID
source: string; // "user" or "agent"
timestamp: string; // ISO timestamp
message?: string; // For message events
type?: string; // Event type (e.g., "message")
action?: string; // Action type (e.g., "run", "edit", "write")
args?: any; // Action arguments
result?: any; // Action result
}
```
## Best Practices
1. **Handle Reconnection**: Implement reconnection logic in your client to handle network interruptions.
2. **Track Event IDs**: Store the latest event ID you've received and use it when reconnecting to avoid duplicate events.
3. **Error Handling**: Implement proper error handling for connection errors and failed actions.
4. **Rate Limiting**: Avoid sending too many actions in a short period to prevent overloading the server.
## Troubleshooting
### Connection Issues
- Verify that the OpenHands server is running and accessible
- Check that you're providing the correct conversation ID
- Ensure your WebSocket URL is correctly formatted
### Authentication Issues
- Make sure you have the necessary authentication cookies if required
- Verify that you have permission to access the specified conversation
### Event Handling Issues
- Check that you're correctly parsing the event data
- Verify that your event handlers are properly registered

View File

@@ -58,17 +58,17 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
The easiest way to run OpenHands is in Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.39
```
You'll find OpenHands running at http://localhost:3000!

View File

@@ -1,6 +1,7 @@
# Azure
OpenHands uses LiteLLM to make calls to Azure's chat models. You can find their documentation on using Azure as a provider [here](https://docs.litellm.ai/docs/providers/azure).
OpenHands uses LiteLLM to make calls to Azure's chat models. You can find their documentation on using Azure as a
provider [here](https://docs.litellm.ai/docs/providers/azure).
## Azure OpenAI Configuration
@@ -18,7 +19,7 @@ docker run -it --pull=always \
...
```
Then in the OpenHands UI Settings:
Then in the OpenHands UI Settings under the `LLM` tab:
:::note
You will need your ChatGPT deployment name which can be found on the deployments page in Azure. This is referenced as

View File

@@ -7,10 +7,11 @@ OpenHands uses LiteLLM to make calls to Google's chat models. You can find their
## Gemini - Google AI Studio Configs
When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings:
When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings under the `LLM` tab:
- `LLM Provider` to `Gemini`
- `LLM Model` to the model you will be using.
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. gemini/&lt;model-name&gt; like `gemini/gemini-2.0-flash`).
If the model is not in the list, enable `Advanced` options, and enter it in `Custom Model`
(e.g. gemini/&lt;model-name&gt; like `gemini/gemini-2.0-flash`).
- `API Key` to your Gemini API key
## VertexAI - Google Cloud Platform Configs
@@ -24,7 +25,8 @@ VERTEXAI_PROJECT="<your-gcp-project-id>"
VERTEXAI_LOCATION="<your-gcp-location>"
```
Then set the following in the OpenHands UI through the Settings:
Then set the following in the OpenHands UI through the Settings under the `LLM` tab:
- `LLM Provider` to `VertexAI`
- `LLM Model` to the model you will be using.
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. vertex_ai/&lt;model-name&gt;).
If the model is not in the list, enable `Advanced` options, and enter it in `Custom Model`
(e.g. vertex_ai/&lt;model-name&gt;).

View File

@@ -1,22 +1,21 @@
# Groq
OpenHands uses LiteLLM to make calls to chat models on Groq. You can find their documentation on using Groq as a provider [here](https://docs.litellm.ai/docs/providers/groq).
OpenHands uses LiteLLM to make calls to chat models on Groq. You can find their documentation on using Groq as a
provider [here](https://docs.litellm.ai/docs/providers/groq).
## Configuration
When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings:
When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings under the `LLM` tab:
- `LLM Provider` to `Groq`
- `LLM Model` to the model you will be using. [Visit here to see the list of
models that Groq hosts](https://console.groq.com/docs/models). If the model is not in the list, toggle
`Advanced` options, and enter it in `Custom Model` (e.g. groq/&lt;model-name&gt; like `groq/llama3-70b-8192`).
models that Groq hosts](https://console.groq.com/docs/models). If the model is not in the list,
enable `Advanced` options, and enter it in `Custom Model` (e.g. groq/&lt;model-name&gt; like `groq/llama3-70b-8192`).
- `API key` to your Groq API key. To find or create your Groq API Key, [see here](https://console.groq.com/keys).
## Using Groq as an OpenAI-Compatible Endpoint
The Groq endpoint for chat completion is [mostly OpenAI-compatible](https://console.groq.com/docs/openai). Therefore, you can access Groq models as you
would access any OpenAI-compatible endpoint. In the OpenHands UI through the Settings:
would access any OpenAI-compatible endpoint. In the OpenHands UI through the Settings under the `LLM` tab:
1. Enable `Advanced` options
2. Set the following:
- `Custom Model` to the prefix `openai/` + the model you will be using (e.g. `openai/llama3-70b-8192`)

View File

@@ -7,7 +7,7 @@ OpenHands supports using the [LiteLLM proxy](https://docs.litellm.ai/docs/proxy/
To use LiteLLM proxy with OpenHands, you need to:
1. Set up a LiteLLM proxy server (see [LiteLLM documentation](https://docs.litellm.ai/docs/proxy/quick_start))
2. When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings:
2. When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings under the `LLM` tab:
* Enable `Advanced` options
* `Custom Model` to the prefix `litellm_proxy/` + the model you will be using (e.g. `litellm_proxy/anthropic.claude-3-5-sonnet-20241022-v2:0`)
* `Base URL` to your LiteLLM proxy URL (e.g. `https://your-litellm-proxy.com`)
@@ -15,6 +15,7 @@ To use LiteLLM proxy with OpenHands, you need to:
## Supported Models
The supported models depend on your LiteLLM proxy configuration. OpenHands supports any model that your LiteLLM proxy is configured to handle.
The supported models depend on your LiteLLM proxy configuration. OpenHands supports any model that your LiteLLM proxy
is configured to handle.
Refer to your LiteLLM proxy configuration for the list of available models and their names.

View File

@@ -11,14 +11,12 @@ OpenHands can connect to any LLM supported by LiteLLM. However, it requires a po
Based on our evaluations of language models for coding tasks (using the SWE-bench dataset), we can provide some
recommendations for model selection. Our latest benchmarking results can be found in [this spreadsheet](https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0).
Based on these findings and community feedback, the following models have been verified to work reasonably well with OpenHands:
Based on these findings and community feedback, these are the latest models that have been verified to work reasonably well with OpenHands:
- [anthropic/claude-3-7-sonnet-20250219](https://www.anthropic.com/api) (recommended)
- [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/)
- [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/)
- [deepseek/deepseek-chat](https://api-docs.deepseek.com/)
- [openai/o3-mini](https://openai.com/index/openai-o3-mini/)
- [openai/o3](https://openai.com/index/introducing-o3-and-o4-mini/)
- [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/)
- [all-hands/openhands-lm-32b-v0.1](https://www.all-hands.dev/blog/introducing-openhands-lm-32b----a-strong-open-coding-agent-model) -- available through [OpenRouter](https://openrouter.ai/all-hands/openhands-lm-32b-v0.1)
@@ -27,8 +25,8 @@ OpenHands will issue many prompts to the LLM you configure. Most of these LLMs c
limits and monitor usage.
:::
If you have successfully run OpenHands with specific LLMs not in the list, please add them to the verified list. We
also encourage you to open a PR to share your setup process to help others using the same provider and LLM!
If you have successfully run OpenHands with specific providers, we encourage you to open a PR to share your setup process
to help others using the same provider!
For a full list of the providers and models available, please consult the
[litellm documentation](https://docs.litellm.ai/docs/providers).

View File

@@ -1,4 +1,4 @@
# Local LLM with SGLang or vLLM
# Local LLMs
:::warning
When using a Local LLM, OpenHands may have limited functionality.
@@ -7,10 +7,91 @@ It is highly recommended that you use GPUs to serve local models for optimal exp
## News
- 2025/05/21: We collaborated with Mistral AI and released [Devstral Small](https://mistral.ai/news/devstral) that achieves [46.8% on SWE-Bench Verified](https://github.com/SWE-bench/experiments/pull/228)!
- 2025/03/31: We released an open model OpenHands LM v0.1 32B that achieves 37.1% on SWE-Bench Verified
([blog](https://www.all-hands.dev/blog/introducing-openhands-lm-32b----a-strong-open-coding-agent-model), [model](https://huggingface.co/all-hands/openhands-lm-32b-v0.1)).
## Download the Model from Huggingface
## Quickstart: Running OpenHands on Your Macbook
### Serve the model on your Macbook
We recommend using [LMStudio](https://lmstudio.ai/) for serving these models locally.
1. Download [LM Studio](https://lmstudio.ai/) and install it
2. Download the model:
- Option 1: Directly download the LLM from [this link](https://lmstudio.ai/model/devstral-small-2505-mlx) or by searching for the name `Devstral-Small-2505` in LM Studio
- Option 2: Download a LLM in GGUF format. For example, to download [Devstral Small 2505 GGUF](https://huggingface.co/mistralai/Devstral-Small-2505_gguf), using `huggingface-cli download mistralai/Devstral-Small-2505_gguf --local-dir mistralai/Devstral-Small-2505_gguf`. Then in bash terminal, run `lms import {model_name}` in the directory where you've downloaded the model checkpoint (e.g. run `lms import devstralQ4_K_M.gguf` in `mistralai/Devstral-Small-2505_gguf`)
3. Open LM Studio application, you should first switch to `power user` mode, and then open the developer tab:
![image](./screenshots/1_select_power_user.png)
4. Then click `Select a model to load` on top of the application:
![image](./screenshots/2_select_model.png)
5. And choose the model you want to use, holding `option` on mac to enable advanced loading options:
![image](./screenshots/3_select_devstral.png)
6. You should then pick an appropriate context window for OpenHands based on your hardware configuration (larger than 32768 is recommended for using OpenHands, but too large may cause you to run out of memory); Flash attention is also recommended if it works on your machine.
![image](./screenshots/4_set_context_window.png)
7. And you should start the server (if it is not already in `Running` status), un-toggle `Serve on Local Network` and remember the port number of the LMStudio URL (`1234` is the port number for `http://127.0.0.1:1234` in this example):
![image](./screenshots/5_copy_url.png)
8. Finally, you can click the `copy` button near model name to copy the model name (`imported-models/uncategorized/devstralq4_k_m.gguf` in this example):
![image](./screenshots/6_copy_to_get_model_name.png)
### Start OpenHands with locally served model
Check [the installation guide](https://docs.all-hands.dev/modules/usage/installation) to make sure you have all the prerequisites for running OpenHands.
```bash
export LMSTUDIO_MODEL_NAME="imported-models/uncategorized/devstralq4_k_m.gguf" # <- Replace this with the model name you copied from LMStudio
export LMSTUDIO_URL="http://host.docker.internal:1234" # <- Replace this with the port from LMStudio
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik
mkdir -p ~/.openhands-state && echo '{"language":"en","agent":"CodeActAgent","max_iterations":null,"security_analyzer":null,"confirmation_mode":false,"llm_model":"lm_studio/'$LMSTUDIO_MODEL_NAME'","llm_api_key":"dummy","llm_base_url":"'$LMSTUDIO_URL/v1'","remote_runtime_resource_factor":null,"github_token":null,"enable_default_condenser":true,"user_consents_to_analytics":true}' > ~/.openhands-state/settings.json
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.39-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.39
```
Once your server is running -- you can visit `http://localhost:3000` in your browser to use OpenHands with local Devstral model:
```
Digest: sha256:e72f9baecb458aedb9afc2cd5bc935118d1868719e55d50da73190d3a85c674f
Status: Image is up to date for docker.all-hands.dev/all-hands-ai/openhands:0.39
Starting OpenHands...
Running OpenHands as root
14:22:13 - openhands:INFO: server_config.py:50 - Using config class None
INFO: Started server process [8]
INFO: Waiting for application startup.
INFO: Application startup complete.
INFO: Uvicorn running on http://0.0.0.0:3000 (Press CTRL+C to quit)
```
## Advanced: Serving LLM on GPUs
### Download model checkpoints
:::note
The model checkpoints downloaded here should NOT be in GGUF format.
:::
For example, to download [OpenHands LM 32B v0.1](https://huggingface.co/all-hands/openhands-lm-32b-v0.1):
@@ -18,9 +99,7 @@ For example, to download [OpenHands LM 32B v0.1](https://huggingface.co/all-hand
huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir all-hands/openhands-lm-32b-v0.1
```
## Create an OpenAI-Compatible Endpoint With a Model Serving Framework
### Serving with SGLang
### Create an OpenAI-Compatible Endpoint With SGLang
- Install SGLang following [the official documentation](https://docs.sglang.ai/start/install.html).
- Example launch command for OpenHands LM 32B (with at least 2 GPUs):
@@ -35,7 +114,7 @@ SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \
--api-key mykey --context-length 131072
```
### Serving with vLLM
### Create an OpenAI-Compatible Endpoint with vLLM
- Install vLLM following [the official documentation](https://docs.vllm.ai/en/latest/getting_started/installation.html).
- Example launch command for OpenHands LM 32B (with at least 2 GPUs):
@@ -49,7 +128,7 @@ vllm serve all-hands/openhands-lm-32b-v0.1 \
--enable-prefix-caching
```
## Run and Configure OpenHands
## Advanced: Run and Configure OpenHands
### Run OpenHands
@@ -75,7 +154,7 @@ Start OpenHands using `make run`.
### Configure OpenHands
Once OpenHands is running, you'll need to set the following in the OpenHands UI through the Settings:
Once OpenHands is running, you'll need to set the following in the OpenHands UI through the Settings under the `LLM` tab:
1. Enable `Advanced` options.
2. Set the following:
- `Custom Model` to `openai/<served-model-name>` (e.g. `openai/openhands-lm-32b-v0.1`)

View File

@@ -1,14 +1,15 @@
# OpenAI
OpenHands uses LiteLLM to make calls to OpenAI's chat models. You can find their documentation on using OpenAI as a provider [here](https://docs.litellm.ai/docs/providers/openai).
OpenHands uses LiteLLM to make calls to OpenAI's chat models. You can find their documentation on using OpenAI as a
provider [here](https://docs.litellm.ai/docs/providers/openai).
## Configuration
When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings:
When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings under the `LLM` tab:
* `LLM Provider` to `OpenAI`
* `LLM Model` to the model you will be using.
[Visit here to see a full list of OpenAI models that LiteLLM supports.](https://docs.litellm.ai/docs/providers/openai#openai-chat-completion-models)
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. openai/&lt;model-name&gt; like `openai/gpt-4o`).
If the model is not in the list, enable `Advanced` options, and enter it in `Custom Model` (e.g. openai/&lt;model-name&gt; like `openai/gpt-4o`).
* `API Key` to your OpenAI API key. To find or create your OpenAI Project API Key, [see here](https://platform.openai.com/api-keys).
## Using OpenAI-Compatible Endpoints
@@ -17,7 +18,7 @@ Just as for OpenAI Chat completions, we use LiteLLM for OpenAI-compatible endpoi
## Using an OpenAI Proxy
If you're using an OpenAI proxy, in the OpenHands UI through the Settings:
If you're using an OpenAI proxy, in the OpenHands UI through the Settings under the `LLM` tab:
1. Enable `Advanced` options
2. Set the following:
- `Custom Model` to openai/&lt;model-name&gt; (e.g. `openai/gpt-4o` or openai/&lt;proxy-prefix&gt;/&lt;model-name&gt;)

View File

@@ -1,12 +1,14 @@
# OpenRouter
OpenHands uses LiteLLM to make calls to chat models on OpenRouter. You can find their documentation on using OpenRouter as a provider [here](https://docs.litellm.ai/docs/providers/openrouter).
OpenHands uses LiteLLM to make calls to chat models on OpenRouter. You can find their documentation on using
OpenRouter as a provider [here](https://docs.litellm.ai/docs/providers/openrouter).
## Configuration
When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings:
When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings under the `LLM` tab:
* `LLM Provider` to `OpenRouter`
* `LLM Model` to the model you will be using.
[Visit here to see a full list of OpenRouter models](https://openrouter.ai/models).
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. openrouter/&lt;model-name&gt; like `openrouter/anthropic/claude-3.5-sonnet`).
If the model is not in the list, enable `Advanced` options, and enter it in
`Custom Model` (e.g. openrouter/&lt;model-name&gt; like `openrouter/anthropic/claude-3.5-sonnet`).
* `API Key` to your OpenRouter API key.

Binary file not shown.

After

Width:  |  Height:  |  Size: 228 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 420 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 558 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 646 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 93 KiB

View File

@@ -13,9 +13,11 @@ or custom tools. MCP is based on the open standard defined at [modelcontextproto
## Configuration
MCP configuration is defined in the `[mcp]` section of your `config.toml` file.
MCP configuration can be defined in:
* The OpenHands UI through the Settings under the `MCP` tab.
* The `config.toml` file under the `[mcp]` section if not using the UI.
### Configuration Example
### Configuration Example via config.toml
```toml
[mcp]
@@ -82,7 +84,7 @@ Stdio servers are configured using an object with the following properties:
When OpenHands starts, it:
1. Reads the MCP configuration from `config.toml`.
1. Reads the MCP configuration.
2. Connects to any configured SSE servers.
3. Starts any configured stdio servers.
4. Registers the tools provided by these servers with the agent.

View File

@@ -0,0 +1,23 @@
# Organization and User Microagents
## Purpose
Organizations and users can define microagents that apply to all repositories belonging to the organization or user.
## Usage
These microagents can be [any type of microagent](./microagents-overview#microagent-types) and will be loaded
accordingly. However, they are applied to all repositories belonging to the organization or user.
Add a `.openhands` repository under the organization or user and create a `microagents` directory and place the
microagents in that directory.
## Example
General microagent file example for organization `Great-Co` located inside the `.openhands` repository:
`microagents/org-microagent.md`:
```
* Use type hints and error boundaries; validate inputs at system boundaries and fail with meaningful error messages.
* Document interfaces and public APIs; use implementation comments only for non-obvious logic.
* Follow the same naming convention for variables, classes, constants, etc. already used in each repository.
```

View File

@@ -7,7 +7,7 @@ They provide expert guidance, automate common tasks, and ensure consistent pract
Currently OpenHands supports the following types of microagents:
- [General Repository Microagents](./microagents-repo): General guidelines for OpenHands about the repository.
- [General Microagents](./microagents-repo): General guidelines for OpenHands about the repository.
- [Keyword-Triggered Microagents](./microagents-keyword): Guidelines activated by specific keywords in prompts.
To customize OpenHands' behavior, create a .openhands/microagents/ directory in the root of your repository and
@@ -24,7 +24,7 @@ Example repository structure:
some-repository/
└── .openhands/
└── microagents/
└── repo.md # General repository guidelines
└── repo.md # General guidelines
└── trigger_this.md # Microagent triggered by specific keywords
└── trigger_that.md # Microagent triggered by specific keywords
```
@@ -34,7 +34,7 @@ some-repository/
Each microagent file may include frontmatter that provides additional information. In some cases, this frontmatter
is required:
| Microagent Type | Required |
|----------------------------------|----------|
| `General Repository Microagents` | No |
| `Keyword-Triggered Microagents` | Yes |
| Microagent Type | Required |
|---------------------------------|----------|
| `General Microagents` | No |
| `Keyword-Triggered Microagents` | Yes |

View File

@@ -1,4 +1,4 @@
# General Repository Microagents
# General Microagents
## Purpose
@@ -20,7 +20,7 @@ Frontmatter should be enclosed in triple dashes (---) and may include the follow
## Example
General repository microagent file example located at `.openhands/microagents/repo.md`:
General microagent file example located at `.openhands/microagents/repo.md`:
```
This project is a TODO application that allows users to track TODO items.
@@ -28,4 +28,4 @@ To set it up, you can run `npm run build`.
Always make sure the tests are passing before committing changes. You can run the tests by running `npm run test`.
```
[See more examples of general repository microagents here.](https://github.com/All-Hands-AI/OpenHands/tree/main/.openhands/microagents)
[See more examples of general microagents here.](https://github.com/All-Hands-AI/OpenHands/tree/main/.openhands/microagents)

View File

@@ -13,14 +13,16 @@ files on your machine. Only use this runtime in controlled environments or when
Before using the Local Runtime, ensure that:
1. You can run OpenHands using the [Development workflow](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md).
2. tmux is available on your system.
2. For Linux and Mac, tmux is available on your system.
3. For Windows, PowerShell is available on your system.
- Only [CLI mode](../how-to/cli-mode) and [headless mode](../how-to/headless-mode) are supported in Windows with Local Runtime.
## Configuration
To use the Local Runtime, besides required configurations like the LLM provider, model and API key, you'll need to set
the following options via environment variables or the [config.toml file](https://github.com/All-Hands-AI/OpenHands/blob/main/config.template.toml) when starting OpenHands:
Via environment variables:
Via environment variables (please use PowerShell syntax for Windows PowerShell):
```bash
# Required
@@ -65,4 +67,4 @@ The Local Runtime is particularly useful for:
- CI/CD pipelines where Docker is not available.
- Testing and development of OpenHands itself.
- Environments where container usage is restricted.
- Environments where container usage is restricted (e.g. native Windows).

View File

@@ -70,7 +70,7 @@ const sidebars: SidebarsConfig = {
},
{
type: 'doc',
label: 'General Repository Microagents',
label: 'General Microagents',
id: 'usage/prompting/microagents-repo',
},
{
@@ -78,6 +78,11 @@ const sidebars: SidebarsConfig = {
label: 'Keyword-Triggered Microagents',
id: 'usage/prompting/microagents-keyword',
},
{
type: 'doc',
label: 'Organization and User Microagents',
id: 'usage/prompting/microagents-org',
},
{
type: 'doc',
label: 'Global Microagents',
@@ -267,6 +272,11 @@ const sidebars: SidebarsConfig = {
label: 'Evaluation',
id: 'usage/how-to/evaluation-harness',
},
{
type: 'doc',
label: 'WebSocket Connection',
id: 'usage/how-to/websocket-connection',
},
],
},
{

View File

@@ -876,6 +876,11 @@
"type": "string",
"nullable": true
},
"conversation_instructions": {
"type": "string",
"nullable": true,
"description": "Optional instructions the agent must follow throughout the conversation while addressing the user's initial task"
},
"image_urls": {
"type": "array",
"items": {

View File

@@ -42,6 +42,37 @@ api_key = "XXX"
temperature = 0.0
```
### Configuring Condensers for Evaluation
For benchmarks that support condenser configuration (like SWE-Bench), you can define multiple condenser configurations in your `config.toml` file. A condenser is responsible for managing conversation history to maintain context while staying within token limits - you can learn more about how it works [here](https://www.all-hands.dev/blog/openhands-context-condensensation-for-more-efficient-ai-agents):
```toml
# LLM-based summarizing condenser for evaluation
[condenser.summarizer_for_eval]
type = "llm"
llm_config = "haiku" # Reference to an LLM config to use for summarization
keep_first = 2 # Number of initial events to always keep
max_size = 100 # Maximum size of history before triggering summarization
# Recent events condenser for evaluation
[condenser.recent_for_eval]
type = "recent"
keep_first = 2 # Number of initial events to always keep
max_events = 50 # Maximum number of events to keep in history
```
You can then specify which condenser configuration to use when running evaluation scripts, for example:
```bash
EVAL_CONDENSER=summarizer_for_eval \
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 500 100 1 princeton-nlp/SWE-bench_Verified test
```
The name is up to you, but should match a name defined in your `config.toml` file. The last argument in the command specifies the condenser configuration to use. In this case, `summarizer_for_eval` is used, which refers to the LLM-based summarizing condenser as defined above.
If no condenser configuration is specified, the 'noop' condenser will be used by default, which keeps the full conversation history.
```
For other configurations specific to evaluation, such as `save_trajectory_path`, these are typically set in the `get_config` function of the respective `run_infer.py` file for each benchmark.
## Supported Benchmarks

View File

@@ -17,7 +17,7 @@ RUN git checkout 4eddc7db6449a5ade3e37285747c8b208cd54ce7
RUN micromamba create -n sci-agent python=3.10 pip setuptools wheel
RUN micromamba run -n sci-agent pip install -r requirements.txt
# Replace all occurence of conda with micromamba under the /workspace
# Replace all occurrences of conda with micromamba under the /workspace
RUN find ./ -type f -exec sed -i 's/conda/micromamba/g' {} \;
# pushd evaluation/scienceagentbench

View File

@@ -45,7 +45,7 @@ For example, for instance ID `django_django-11011`, it will try to pull our pre-
This image will be used create an OpenHands runtime image where the agent will operate on.
```bash
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [dataset] [dataset_split]
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [dataset] [dataset_split] [n_runs] [mode]
# Example
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 500 100 1 princeton-nlp/SWE-bench_Verified test
@@ -63,19 +63,26 @@ to `CodeActAgent`.
default, the script evaluates the entire SWE-bench_Lite test set (300 issues). Note:
in order to use `eval_limit`, you must also set `agent`.
- `max_iter`, e.g. `20`, is the maximum number of iterations for the agent to run. By
default, it is set to 60.
default, it is set to 100.
- `num_workers`, e.g. `3`, is the number of parallel workers to run the evaluation. By
default, it is set to 1.
- `dataset`, a huggingface dataset name. e.g. `princeton-nlp/SWE-bench`, `princeton-nlp/SWE-bench_Lite`, `princeton-nlp/SWE-bench_Verified`, or `princeton-nlp/SWE-bench_Multimodal`, specifies which dataset to evaluate on.
- `dataset_split`, split for the huggingface dataset. e.g., `test`, `dev`. Default to `test`.
- `n_runs`, e.g. `3`, is the number of times to run the evaluation. Default is 1.
- `mode`, e.g. `swt`, `swt-ci`, or `swe`, specifies the evaluation mode. Default is `swe`.
> [!CAUTION]
> Setting `num_workers` larger than 1 is not officially tested, YMMV.
There is also one optional environment variable you can set.
There are also optional environment variables you can set:
```bash
export USE_HINT_TEXT=true # if you want to use hint text in the evaluation. Default to false. Ignore this if you are not sure.
# Use hint text in the evaluation (default: false)
export USE_HINT_TEXT=true # Ignore this if you are not sure.
# Specify a condenser configuration for memory management (default: NoOpCondenser)
export EVAL_CONDENSER=summarizer_for_eval # Name of the condenser config group in config.toml
```
Let's say you'd like to run 10 instances using `llm.eval_gpt4_1106_preview` and CodeActAgent,
@@ -102,9 +109,9 @@ Fill out [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZj
```bash
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [dataset] [dataset_split]
# Example - This runs evaluation on CodeActAgent for 300 instances on "princeton-nlp/SWE-bench_Lite"'s test set, with max 30 iteration per instances, with 16 number of workers running in parallel
# Example - This runs evaluation on CodeActAgent for 300 instances on "princeton-nlp/SWE-bench_Lite"'s test set, with max 100 iteration per instances, with 16 number of workers running in parallel
ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval HEAD CodeActAgent 300 30 16 "princeton-nlp/SWE-bench_Lite" test
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval HEAD CodeActAgent 300 100 16 "princeton-nlp/SWE-bench_Lite" test
```
To clean-up all existing runtime you've already started, run:
@@ -176,7 +183,7 @@ Fill out [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZj
# Example - This evaluates patches generated by CodeActAgent on Llama-3.1-70B-Instruct-Turbo on "princeton-nlp/SWE-bench_Lite"'s test set, with 16 number of workers running in parallel
ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/swe-bench-lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_30_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/swe-bench-lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_100_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
```
To clean-up all existing runtimes that you've already started, run:

View File

@@ -44,6 +44,8 @@ from openhands.core.config import (
get_llm_config_arg,
get_parser,
)
from openhands.core.config.utils import get_condenser_config_arg
from openhands.core.config.condenser_config import NoOpCondenserConfig
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.critic import AgentFinishedCritic
@@ -261,6 +263,7 @@ def get_config(
enable_jupyter=False,
enable_browsing=RUN_WITH_BROWSING,
enable_llm_editor=False,
enable_mcp=False,
condenser=metadata.condenser_config,
enable_prompt_extensions=False,
)
@@ -714,6 +717,19 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
subset = dataset[dataset[filter_column].isin(selected_ids)]
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
return subset
if 'selected_repos' in data:
# repos for the swe-bench instances:
# ['astropy/astropy', 'django/django', 'matplotlib/matplotlib', 'mwaskom/seaborn', 'pallets/flask', 'psf/requests', 'pydata/xarray', 'pylint-dev/pylint', 'pytest-dev/pytest', 'scikit-learn/scikit-learn', 'sphinx-doc/sphinx', 'sympy/sympy']
selected_repos = data['selected_repos']
if isinstance(selected_repos, str): selected_repos = [selected_repos]
assert isinstance(selected_repos, list)
logger.info(
f'Filtering {selected_repos} tasks from "selected_repos"...'
)
subset = dataset[dataset["repo"].isin(selected_repos)]
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
return subset
skip_ids = os.environ.get('SKIP_IDS', '').split(',')
if len(skip_ids) > 0:
logger.info(f'Filtering {len(skip_ids)} tasks from "SKIP_IDS"...')
@@ -742,6 +758,7 @@ if __name__ == '__main__':
choices=['swe', 'swt', 'swt-ci'],
help="mode to run the evaluation, either 'swe', 'swt', or 'swt-ci'",
)
args, _ = parser.parse_known_args()
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
@@ -778,6 +795,19 @@ if __name__ == '__main__':
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
# Get condenser config from environment variable
condenser_name = os.environ.get('EVAL_CONDENSER')
if condenser_name:
condenser_config = get_condenser_config_arg(condenser_name)
if condenser_config is None:
raise ValueError(
f'Could not find Condenser config: EVAL_CONDENSER={condenser_name}'
)
else:
# If no specific condenser config is provided via env var, default to NoOpCondenser
condenser_config = NoOpCondenserConfig()
logger.debug('No Condenser config provided via EVAL_CONDENSER, using NoOpCondenser.')
details = {'mode': args.mode}
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
@@ -792,6 +822,7 @@ if __name__ == '__main__':
args.eval_note,
args.eval_output_dir,
details=details,
condenser_config=condenser_config,
)
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')

View File

@@ -14,6 +14,7 @@ SPLIT=$8
N_RUNS=$9
MODE=${10}
if [ -z "$NUM_WORKERS" ]; then
NUM_WORKERS=1
echo "Number of workers not specified, use default $NUM_WORKERS"
@@ -26,8 +27,8 @@ if [ -z "$AGENT" ]; then
fi
if [ -z "$MAX_ITER" ]; then
echo "MAX_ITER not specified, use default 60"
MAX_ITER=60
echo "MAX_ITER not specified, use default 100"
MAX_ITER=100
fi
if [ -z "$RUN_WITH_BROWSING" ]; then
@@ -51,6 +52,12 @@ if [ -z "$MODE" ]; then
echo "MODE not specified, use default $MODE"
fi
if [ -n "$EVAL_CONDENSER" ]; then
echo "Using Condenser Config: $EVAL_CONDENSER"
else
echo "No Condenser Config provided via EVAL_CONDENSER, use default (NoOpCondenser)."
fi
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
@@ -65,6 +72,7 @@ echo "MAX_ITER: $MAX_ITER"
echo "NUM_WORKERS: $NUM_WORKERS"
echo "COMMIT_HASH: $COMMIT_HASH"
echo "MODE: $MODE"
echo "EVAL_CONDENSER: $EVAL_CONDENSER"
# Default to NOT use Hint
if [ -z "$USE_HINT_TEXT" ]; then
@@ -88,6 +96,10 @@ fi
if [ "$MODE" != "swe" ]; then
EVAL_NOTE="${EVAL_NOTE}-${MODE}"
fi
# Add condenser config to eval note if provided
if [ -n "$EVAL_CONDENSER" ]; then
EVAL_NOTE="${EVAL_NOTE}-${EVAL_CONDENSER}"
fi
function run_eval() {
local eval_note="${1}"
@@ -101,6 +113,8 @@ function run_eval() {
--split $SPLIT \
--mode $MODE"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"

View File

@@ -0,0 +1,172 @@
# Visual SWE-Bench Evaluation with Docker Image
This folder contains the evaluation harness that we built on top of the original [Visual SWE-Bench benchmark](https://multi-swe-bench.github.io/#/) ([paper](https://arxiv.org/abs/2412.17315)).
The evaluation consists of three steps:
1. Environment setup: [install python environment](../../README.md#development-environment), [configure LLM config](../../README.md#configure-openhands-and-your-llm), and [pull docker](#openhands-visual-swe-bench-instance-level-docker-support).
2. [Run inference](#run-inference-on-visual-swe-bench-instances): Generate a edit patch for each Github issue.
3. [Evaluate patches using Visual SWE-Bench docker](#evaluate-generated-patches).
## Setup Environment and LLM Configuration
Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
## OpenHands Visual SWE-Bench Instance-level Docker Support
OpenHands now support using the official evaluation docker for both **[inference](#run-inference-on-visual-swe-bench-instances) and [evaluation](#evaluate-generated-patches)**.
This is now the default behavior.
## Run Inference on Visual SWE-Bench Instances
Make sure your Docker daemon is running, and you have ample disk space for the [instance-level docker image](#openhands-visual-swe-bench-instance-level-docker-support).
When the `run_infer.sh` script is started, it will automatically pull the relevant Visual SWE-Bench images. For example, for instance ID `networkx__networkx-6503`, it will try to pull our pre-build docker image `sweb.eval.x86_64.networkx_s_networkx-6503` from DockerHub. This image will be used create an OpenHands runtime image where the agent will operate on.
```bash
./evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers]
# Example
./evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 133 30 1
```
where `model_config` is mandatory, and the rest are optional.
- `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your
LLM settings, as defined in your `config.toml`.
- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenHands version you would
like to evaluate. It could also be a release tag like `0.6.2`.
- `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
to `CodeActAgent`.
- `eval_limit`, e.g. `10`, limits the evaluation to the first `eval_limit` instances. By
default, the script evaluates the entire Visual SWE-bench set (133 issues). Note:
in order to use `eval_limit`, you must also set `agent`.
- `max_iter`, e.g. `20`, is the maximum number of iterations for the agent to run. By
default, it is set to 30.
- `num_workers`, e.g. `3`, is the number of parallel workers to run the evaluation. By
default, it is set to 1.
There are also two optional environment variables you can set.
```bash
export USE_HINT_TEXT=true # if you want to use hint text in the evaluation. Default to false. Ignore this if you are not sure.
export USE_INSTANCE_IMAGE=true # if you want to use instance-level docker images. Default to true
```
Let's say you'd like to run 10 instances using `llm.eval_gpt4_1106_preview` and CodeActAgent,
then your command would be:
```bash
./evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 10
```
### Specify a subset of tasks to run infer
If you would like to specify a list of tasks you'd like to benchmark on, you could
create a `config.toml` under `./evaluation/benchmarks/visual_swe_bench/` folder, and put a list
attribute named `selected_ids`, e.g.
```toml
selected_ids = ['astropy__astropy-13838', 'matplotlib__matplotlib-21617', 'plotly__plotly.py-1966']
```
Then only these tasks (rows whose `instance_id` is in the above list) will be evaluated.
In this case, `eval_limit` option applies to tasks that are in the `selected_ids` list.
After running the inference, you will obtain a `output.jsonl` (by default it will be saved to `evaluation/evaluation_outputs`).
## Evaluate Generated Patches
### Download Docker Images
**(Recommended for reproducibility)** If you have extra local space (e.g., 200GB), you can try pull the instance-level docker images we've prepared by running:
```bash
evaluation/benchmarks/visual_swe_bench/scripts/docker/pull_all_eval_docker.sh instance
```
If you want to save disk space a bit, while speeding up the image pre-build process, you can pull the environment-level docker images:
```bash
evaluation/benchmarks/visual_swe_bench/scripts/docker/pull_all_eval_docker.sh env
```
If you want to evaluate on the full SWE-Bench test set:
```bash
evaluation/benchmarks/visual_swe_bench/scripts/docker/pull_all_eval_docker.sh instance full
```
### Run evaluation
With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patches, and produce a fine-grained report.
**This evaluation is performed using the official dockerized evaluation announced.**
> If you want to evaluate existing results, you should first run this to clone existing outputs
>
>```bash
>git clone https://huggingface.co/spaces/OpenHands/evaluation evaluation/evaluation_outputs
>```
NOTE, you should have already pulled the instance-level OR env-level docker images following [this section](#openhands-visual-swe-bench-instance-level-docker-support).
Then you can run the following:
```bash
./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh $YOUR_OUTPUT_JSONL [instance_id]
# Example
./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/luolin101__Visual-SWE-bench-test/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
```
The script now accepts optional arguments:
- `instance_id`: Specify a single instance to evaluate (optional)
For example, to evaluate a specific instance with a custom dataset and split:
```bash
./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh $YOUR_OUTPUT_JSONL instance_123
```
> You can also pass in a JSONL with SWE-Bench format to `./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh`, where each line is a JSON of `{"model_patch": "XXX", "model_name_or_path": "YYY", "instance_id": "ZZZ"}`.
The final results will be saved to `evaluation/evaluation_outputs/outputs/visual_swe_bench/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/` with the following files/directory:
- `README.md`: a report showing what are the instances that passed, failed, etc.
- `report.json`: a JSON file that contains keys like `"resolved_ids"` pointing to instance IDs that are resolved by the agent.
- `logs/`: a directory of test logs
## Visualize Results
First you need to clone `https://huggingface.co/spaces/OpenHands/evaluation` and add your own running results from openhands into the `outputs` of the cloned repo.
```bash
git clone https://huggingface.co/spaces/OpenHands/evaluation
```
**(optional) setup streamlit environment with conda**:
```bash
cd evaluation
conda create -n streamlit python=3.10
conda activate streamlit
pip install -r requirements.txt
```
**run the visualizer**:
Then, in a separate Python environment with `streamlit` library, you can run the following:
```bash
# Make sure you are inside the cloned `evaluation` repo
conda activate streamlit # if you follow the optional conda env setup above
streamlit app.py --server.port 8501 --server.address 0.0.0.0
```
Then you can access the SWE-Bench trajectory visualizer at `localhost:8501`.
## Submit your evaluation results
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).

View File

@@ -0,0 +1,641 @@
import asyncio
import json
import os
import tempfile
from typing import Any
import pandas as pd
import toml
from datasets import load_dataset
import openhands.agenthub
from evaluation.benchmarks.swe_bench.resource.mapping import (
get_instance_resource_factor,
)
from evaluation.utils.shared import (
EvalException,
EvalMetadata,
EvalOutput,
assert_and_raise,
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
is_fatal_evaluation_error,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_llm_config_for_completions_logging,
)
from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
AppConfig,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation, ErrorObservation
from openhands.events.serialization.event import event_to_dict
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.utils.shutdown_listener import sleep_if_should_continue
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
}
def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
return f'{instance.repo}__{instance.version}'.replace('/', '__')
def get_instruction(instance: pd.Series, metadata: EvalMetadata):
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
# Instruction based on Anthropic's official trajectory
# https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
instruction = (
'<uploaded_files>\n'
f'/workspace/{workspace_dir_name}\n'
'</uploaded_files>\n'
f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
f'<issue_description>\n'
f'{instance.problem_statement}\n'
'</issue_description>\n\n'
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
"Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
'Follow these steps to resolve the issue:\n'
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
'2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error\n'
'3. Edit the sourcecode of the repo to resolve the issue\n'
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well\n'
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
' - The issue you are fixing\n'
' - The files you modified\n'
' - The functions you changed\n'
' Make sure all these tests pass with your changes.\n'
"Your thinking should be thorough and so it's fine if it's very long.\n"
)
if RUN_WITH_BROWSING:
instruction += (
'<IMPORTANT!>\nYou SHOULD NEVER attempt to browse the web. </IMPORTANT!>\n'
)
return instruction
# TODO: migrate all swe-bench docker to ghcr.io/openhands
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'docker.io/xingyaoww/')
logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
def get_instance_docker_image(instance_id: str, official_image: bool = False) -> str:
image_name = 'sweb.eval.x86_64.' + instance_id
image_name = image_name.replace(
'__', '_s_'
) # to comply with docker image naming convention
other_list = [
'plotly__plotly.py-4083',
'plotly__plotly.py-2600',
'plotly__plotly.py-2591',
'plotly__plotly.py-1966',
'networkx__networkx-6503',
'networkx__networkx-6098',
'networkx__networkx-5616',
'networkx__networkx-5354',
'networkx__networkx-5058',
'networkx__networkx-4378',
'networkx__networkx-3764',
'vega__altair-2785',
'vega__altair-1092',
'vega__altair-974',
'vega__altair-830',
'matplotlib__matplotlib-27754',
'matplotlib__matplotlib-26926',
'matplotlib__matplotlib-26788',
'matplotlib__matplotlib-26586',
'sympy__sympy-26941',
'mwaskom__seaborn-3458',
'mwaskom__seaborn-3454',
]
if instance_id in other_list:
return ('docker.io/luolin101/'.rstrip('/') + '/' + image_name).lower()
return (DOCKER_IMAGE_PREFIX.rstrip('/') + '/' + image_name).lower()
def get_config(
instance: pd.Series,
metadata: EvalMetadata,
) -> AppConfig:
# We use a different instance image for the each instance of swe-bench eval
use_official_image = bool(
'verified' in metadata.dataset.lower() or 'lite' in metadata.dataset.lower()
)
base_container_image = get_instance_docker_image(
instance['instance_id'], use_official_image
)
logger.info(
f'Using instance container image: {base_container_image}. '
f'Please make sure this image exists. '
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
)
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = base_container_image
sandbox_config.enable_auto_lint = True
sandbox_config.use_host_network = False
# Add platform to the sandbox config to solve issue 4401
sandbox_config.platform = 'linux/amd64'
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
dataset_name=metadata.dataset,
instance_id=instance['instance_id'],
)
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
)
)
agent_config = AgentConfig(
enable_jupyter=False,
enable_browsing=RUN_WITH_BROWSING,
enable_llm_editor=False,
condenser=metadata.condenser_config,
enable_prompt_extensions=False,
)
config.set_agent_config(agent_config)
return config
def initialize_runtime(
runtime: Runtime,
instance: pd.Series, # this argument is not required
):
"""Initialize the runtime for the agent.
This function is called before the runtime is used to run the agent.
"""
logger.info('-' * 30)
logger.info('BEGIN Runtime Initialization Fn')
logger.info('-' * 30)
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
obs: CmdOutputObservation
# Set instance id
action = CmdRunAction(
command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc"""
)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {str(obs)}'
)
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
# inject the init script
script_dir = os.path.dirname(__file__)
# inject the instance info
action = CmdRunAction(command='mkdir -p /swe_util/eval_data/instances')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to create /swe_util/eval_data/instances: {str(obs)}',
)
swe_instance_json_name = 'swe-bench-instance.json'
with tempfile.TemporaryDirectory() as temp_dir:
# Construct the full path for the desired file name within the temporary directory
temp_file_path = os.path.join(temp_dir, swe_instance_json_name)
# Write to the file with the desired name within the temporary directory
with open(temp_file_path, 'w') as f:
if not isinstance(instance, dict):
json.dump([instance.to_dict()], f)
else:
json.dump([instance], f)
# Copy the file to the desired location
runtime.copy_to(temp_file_path, '/swe_util/eval_data/instances/')
# inject the instance swe entry
runtime.copy_to(
str(os.path.join(script_dir, 'scripts/setup/instance_swe_entry.sh')),
'/swe_util/',
)
action = CmdRunAction(command='cat ~/.bashrc')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}')
action = CmdRunAction(command='source ~/.bashrc')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if isinstance(obs, ErrorObservation):
logger.error(f'Failed to source ~/.bashrc: {str(obs)}')
assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}')
action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to source /swe_util/instance_swe_entry.sh: {str(obs)}',
)
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
)
action = CmdRunAction(command='git reset --hard')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {str(obs)}')
action = CmdRunAction(
command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
action = CmdRunAction(command='which python')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0 and 'testbed' in obs.content,
f'Expected to find python interpreter from testbed, but got: {str(obs)}',
)
logger.info('-' * 30)
logger.info('END Runtime Initialization Fn')
logger.info('-' * 30)
def complete_runtime(
runtime: Runtime,
instance: pd.Series, # this argument is not required, but it is used to get the workspace_dir_name
) -> dict[str, Any]:
"""Complete the runtime for the agent.
This function is called before the runtime is used to run the agent.
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info('-' * 30)
logger.info('BEGIN Runtime Completion Fn')
logger.info('-' * 30)
obs: CmdOutputObservation
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if obs.exit_code == -1:
# The previous command is still running
# We need to kill previous command
logger.info('The previous command is still running, trying to kill it...')
action = CmdRunAction(command='C-c')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
# Then run the command again
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
)
action = CmdRunAction(command='git config --global core.pager ""')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to git config --global core.pager "": {str(obs)}',
)
# First check for any git repositories in subdirectories
action = CmdRunAction(command='find . -type d -name .git -not -path "./.git"')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to find git repositories: {str(obs)}',
)
git_dirs = [p for p in obs.content.strip().split('\n') if p]
if git_dirs:
# Remove all .git directories in subdirectories
for git_dir in git_dirs:
action = CmdRunAction(command=f'rm -rf "{git_dir}"')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to remove git directory {git_dir}: {str(obs)}',
)
# add all files
action = CmdRunAction(command='git add -A')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to git add -A: {str(obs)}',
)
n_retries = 0
git_patch = None
while n_retries < 5:
action = CmdRunAction(
command=f'git diff --no-color --cached {instance["base_commit"]}'
)
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
n_retries += 1
if isinstance(obs, CmdOutputObservation):
if obs.exit_code == 0:
git_patch = obs.content.strip()
break
else:
logger.info('Failed to get git diff, retrying...')
sleep_if_should_continue(10)
elif isinstance(obs, ErrorObservation):
logger.error(f'Error occurred: {obs.content}. Retrying...')
sleep_if_should_continue(10)
else:
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
assert_and_raise(git_patch is not None, 'Failed to get git diff (None)')
logger.info('-' * 30)
logger.info('END Runtime Completion Fn')
logger.info('-' * 30)
return {'git_patch': git_patch}
def process_instance(
instance: pd.Series,
metadata: EvalMetadata,
reset_logger: bool = True,
runtime_failure_count: int = 0,
) -> EvalOutput:
config = get_config(instance, metadata)
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
else:
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
# Increase resource_factor with increasing attempt_id
if runtime_failure_count > 0:
config.sandbox.remote_runtime_resource_factor = min(
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
8,
)
logger.warning(
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
try:
initialize_runtime(runtime, instance)
instruction = get_instruction(instance, metadata)
# Here's how you can run the agent (similar to the `main` function) and get the final task state
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=MessageAction(content=instruction),
runtime=runtime,
fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
metadata.agent_class
],
)
)
# if fatal error, throw EvalError to trigger re-run
if is_fatal_evaluation_error(state.last_error):
raise EvalException('Fatal error detected: ' + state.last_error)
# ======= THIS IS SWE-Bench specific =======
# Get git patch
return_val = complete_runtime(runtime, instance)
git_patch = return_val['git_patch']
logger.info(
f'Got git diff for instance {instance.instance_id}:\n--------\n{git_patch}\n--------'
)
finally:
runtime.close()
# ==========================================
# ======= Attempt to evaluate the agent's edits =======
# we use eval_infer.sh to evaluate the agent's edits, not here
# because the agent may alter the environment / testcases
test_result = {
'git_patch': git_patch,
}
# If you are working on some simpler benchmark that only evaluates the final model output (e.g., in a MessageAction)
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
if state is None:
raise ValueError('State should not be None.')
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
histories = [event_to_dict(event) for event in state.history]
metrics = get_metrics(state)
# Save the output
output = EvalOutput(
instance_id=instance.instance_id,
instruction=instruction,
instance=instance.to_dict(), # SWE Bench specific
test_result=test_result,
metadata=metadata,
history=histories,
metrics=metrics,
error=state.last_error if state and state.last_error else None,
)
return output
def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.toml')
if os.path.exists(file_path):
with open(file_path, 'r') as file:
data = toml.load(file)
if 'selected_ids' in data:
selected_ids = data['selected_ids']
logger.info(
f'Filtering {len(selected_ids)} tasks from "selected_ids"...'
)
subset = dataset[dataset[filter_column].isin(selected_ids)]
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
return subset
skip_ids = os.environ.get('SKIP_IDS', '').split(',')
if len(skip_ids) > 0:
logger.info(f'Filtering {len(skip_ids)} tasks from "SKIP_IDS"...')
return dataset[~dataset[filter_column].isin(skip_ids)]
return dataset
# A list of instances that are known to be tricky to infer
# (will cause runtime failure even with resource factor = 8)
SWEGYM_EXCLUDE_IDS = [
'dask__dask-10422',
'pandas-dev__pandas-50548',
'pandas-dev__pandas-53672',
'pandas-dev__pandas-54174',
'pandas-dev__pandas-55518',
'pandas-dev__pandas-58383',
'pydata__xarray-6721',
'pytest-dev__pytest-10081',
'pytest-dev__pytest-7236',
]
if __name__ == '__main__':
parser = get_parser()
parser.add_argument(
'--dataset',
type=str,
default='princeton-nlp/SWE-bench',
help='data set to evaluate on, either full-test or lite-test',
)
parser.add_argument(
'--split',
type=str,
default='test',
help='split to evaluate on',
)
args, _ = parser.parse_known_args()
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
# so we don't need to manage file uploading to OpenHands's repo
dataset = load_dataset(args.dataset, split=args.split)
swe_bench_tests = filter_dataset(dataset.to_pandas(), 'instance_id')
logger.info(
f'Loaded dataset {args.dataset} with split {args.split}: {len(swe_bench_tests)} tasks'
)
if 'SWE-Gym' in args.dataset:
swe_bench_tests = swe_bench_tests[
~swe_bench_tests['instance_id'].isin(SWEGYM_EXCLUDE_IDS)
]
logger.info(
f'{len(swe_bench_tests)} tasks left after excluding SWE-Gym excluded tasks'
)
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config.log_completions = True
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
details = {}
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
dataset_descrption = (
args.dataset.replace('/', '__') + '-' + args.split.replace('/', '__')
)
metadata = make_metadata(
llm_config,
dataset_descrption,
args.agent_cls,
args.max_iterations,
args.eval_note,
args.eval_output_dir,
details=details,
)
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
print(f'### OUTPUT FILE: {output_file} ###')
instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit)
if len(instances) > 0 and not isinstance(
instances['PASS_TO_PASS'][instances['PASS_TO_PASS'].index[0]], str
):
for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']:
instances[col] = instances[col].apply(lambda x: str(x))
run_evaluation(
instances,
metadata,
output_file,
args.eval_num_workers,
process_instance,
timeout_seconds=8 * 60 * 60, # 8 hour PER instance should be more than enough
max_retries=5,
)

View File

@@ -0,0 +1,157 @@
xingyaoww/sweb.eval.x86_64.astropy_s_astropy-11693:latest
xingyaoww/sweb.eval.x86_64.astropy_s_astropy-13838:latest
xingyaoww/sweb.eval.x86_64.astropy_s_astropy-14295:latest
xingyaoww/sweb.eval.x86_64.astropy_s_astropy-8292:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13908:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13980:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13983:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13984:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-14043:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-14623:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-19763:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20470:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20518:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20584:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20761:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20826:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21443:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21490:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21550:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21568:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21617:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-22865:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-22871:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-22931:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-23047:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-23111:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-23412:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24088:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24177:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24189:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24570:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24691:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24749:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24768:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24849:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24870:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24971:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25287:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25334:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25340:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25346:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25405:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25499:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25565:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25640:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25667:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25779:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-26078:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-26466:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-2576:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-2846:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-2979:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3180:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3187:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3202:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3216:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3217:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3276:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3394:latest
xingyaoww/sweb.eval.x86_64.pydata_s_xarray-4182:latest
xingyaoww/sweb.eval.x86_64.pydata_s_xarray-5682:latest
xingyaoww/sweb.eval.x86_64.pylint-dev_s_pylint-4551:latest
xingyaoww/sweb.eval.x86_64.scikit-learn_s_scikit-learn-13087:latest
xingyaoww/sweb.eval.x86_64.scikit-learn_s_scikit-learn-13618:latest
xingyaoww/sweb.eval.x86_64.scikit-learn_s_scikit-learn-14067:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10048:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10097:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10191:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10435:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-11266:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-11502:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-7615:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-7757:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8028:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8056:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8075:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8120:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8265:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8278:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8620:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8621:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8638:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8658:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9229:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9230:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9289:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9320:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9350:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9464:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9673:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9698:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9797:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9982:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9987:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9997:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9999:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-11787:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-11788:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-13264:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-13840:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15151:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15304:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15625:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15976:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-16003:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-17067:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-17115:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-18922:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-21769:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-24723:latest
luolin101/sweb.eval.x86_64.plotly_s_plotly.py-4083:latest
luolin101/sweb.eval.x86_64.plotly_s_plotly.py-2600:latest
luolin101/sweb.eval.x86_64.plotly_s_plotly.py-2591:latest
luolin101/sweb.eval.x86_64.plotly_s_plotly.py-1966:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-6503:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-6098:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-5616:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-5354:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-5058:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-4378:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-3764:latest
luolin101/sweb.eval.x86_64.vega_s_altair-2785:latest
luolin101/sweb.eval.x86_64.vega_s_altair-1092:latest
luolin101/sweb.eval.x86_64.vega_s_altair-974:latest
luolin101/sweb.eval.x86_64.vega_s_altair-830:latest
luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-27754:latest
luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-26926:latest
luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-26788:latest
luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-26586:latest
luolin101/sweb.eval.x86_64.sympy_s_sympy-26941:latest
luolin101/sweb.eval.x86_64.mwaskom_s_seaborn-3458:latest
luolin101/sweb.eval.x86_64.mwaskom_s_seaborn-3454:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25631:latest
xingyaoww/sweb.env.x86_64.428468730904ff6b4232aa:latest
xingyaoww/sweb.env.x86_64.89a9e6df7ab7bcb9e010c8:latest
xingyaoww/sweb.env.x86_64.15374367de368534f261e3:latest
xingyaoww/sweb.env.x86_64.6b007979cf533f0f3016e8:latest
xingyaoww/sweb.env.x86_64.b382c45e0a94d34ef0fc86:latest
xingyaoww/sweb.env.x86_64.7037e8c448a4b8ebfe9b13:latest
xingyaoww/sweb.env.x86_64.31244378a92e3bcce809ac:latest
xingyaoww/sweb.env.x86_64.efa6065ed5bf204410fd53:latest
xingyaoww/sweb.env.x86_64.a0efca7a0fe6719dbf65c2:latest
xingyaoww/sweb.env.x86_64.502d8fc6ebccd881244091:latest
luolin101/sweb.env.x86_64.eb002359cfcbe2edb56088:latest
xingyaoww/sweb.env.x86_64.d905bb51fb68acc5d4221b:latest
xingyaoww/sweb.env.x86_64.aa92880033da20ca313928:latest
luolin101/sweb.env.x86_64.c6d251a05e0af7688b64fd:latest
xingyaoww/sweb.env.x86_64.c795f4b88616b8462021ed:latest
luolin101/sweb.env.x86_64.1e5a06e76ee016d067d77e:latest
luolin101/sweb.env.x86_64.2e03d8e4d4bd373937a9ef:latest
luolin101/sweb.env.x86_64.4c16026920d27ea78f3b7a:latest
luolin101/sweb.env.x86_64.d15120dfdbda9831e9646b:latest
luolin101/sweb.env.x86_64.c581ba273c3275679773dd:latest
luolin101/sweb.env.x86_64.dc800a1bbe275c5de0c4aa:latest
luolin101/sweb.env.x86_64.59bd7d84a0939c7caba7e6:latest
xingyaoww/sweb.env.x86_64.0d80c7dec81ee2f2f513e2:latest
xingyaoww/sweb.base.x86_64:latest

View File

@@ -0,0 +1,62 @@
#!/bin/bash
set -e
LEVEL=$1
# three levels:
# - base, keyword "sweb.base"
# - env, keyword "sweb.env"
# - instance, keyword "sweb.eval"
SET=$2
if [ -z "$LEVEL" ]; then
echo "Usage: $0 <cache_level> <set>"
echo "cache_level: base, env, or instance"
echo "set: lite, full"
exit 1
fi
if [ -z "$SET" ]; then
echo "Usage: $0 <cache_level> <set>"
echo "cache_level: base, env, or instance"
echo "set: lite, full, default is lite"
SET="lite"
fi
if [ "$SET" == "full" ]; then
IMAGE_FILE="$(dirname "$0")/all-visualswebench-full-instance-images.txt"
else
IMAGE_FILE="$(dirname "$0")/all-visualswebench-full-instance-images.txt"
fi
# Define a pattern based on the level
case $LEVEL in
base)
PATTERN="sweb.base"
;;
env)
PATTERN="sweb.base\|sweb.env"
;;
instance)
PATTERN="sweb.base\|sweb.env\|sweb.eval"
;;
*)
echo "Invalid cache level: $LEVEL"
echo "Valid levels are: base, env, instance"
exit 1
;;
esac
echo "Pulling docker images for [$LEVEL] level"
echo "Pattern: $PATTERN"
echo "Image file: $IMAGE_FILE"
# Read each line from the file, filter by pattern, and pull the docker image
grep "$PATTERN" "$IMAGE_FILE" | while IFS= read -r image; do
echo "Pulling $image into $image"
docker pull $image
# replace _s_ to __ in the image name
renamed_image=$(echo "$image" | sed 's|.*/||; s/_s_/__/g')
docker tag $image $renamed_image
done

View File

@@ -0,0 +1,141 @@
#!/bin/bash
PROCESS_FILEPATH=$1
if [ -z "$PROCESS_FILEPATH" ]; then
echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
exit 1
fi
if [ ! -f $PROCESS_FILEPATH ]; then
echo "Error: $PROCESS_FILEPATH is not a file"
exit 1
fi
# If instance_id is empty, it means we want to eval on the whole $PROCESS_FILEPATH
# otherwise, we want to eval on the instance_id
INSTANCE_ID=$2
DATASET_NAME=${3:-"luolin101/Visual-SWE-bench"}
SPLIT=${4:-"test"}
echo "INSTANCE_ID: $INSTANCE_ID"
echo "DATASET_NAME: $DATASET_NAME"
echo "SPLIT: $SPLIT"
PROCESS_FILEPATH=$(realpath $PROCESS_FILEPATH)
FILE_DIR=$(dirname $PROCESS_FILEPATH)
FILE_NAME=$(basename $PROCESS_FILEPATH)
echo "Evaluating $FILE_NAME @ $FILE_DIR"
# ================================================
# detect whether PROCESS_FILEPATH is in OH format or in SWE-bench format
echo "=============================================================="
echo "Detecting whether PROCESS_FILEPATH is in OH format or in SWE-bench format"
echo "=============================================================="
# SWE-bench format is a JSONL where every line has three fields: model_name_or_path, instance_id, and model_patch
function is_swebench_format() {
# Read the first line of the file
read -r first_line < "$PROCESS_FILEPATH"
# Use jq to check if the first line has the required fields
echo "$first_line" | jq -e '. | has("model_name_or_path") and has("instance_id") and has("model_patch")' > /dev/null
if [ $? -ne 0 ]; then
return 1 # Return 1 if the first line does not have the required fields
fi
return 0 # Return 0 if the first line has the required fields
}
# Call the function with the file path
is_swebench_format "$PROCESS_FILEPATH"
IS_SWEBENCH_FORMAT=$?
# Use the result in an if-else statement
if [ $IS_SWEBENCH_FORMAT -eq 0 ]; then
echo "The file IS in SWE-bench format."
SWEBENCH_FORMAT_JSONL=$PROCESS_FILEPATH
else
echo "The file IS NOT in SWE-bench format."
# ==== Convert OH format to SWE-bench format ====
echo "Merged output file with fine-grained report will be saved to $FILE_DIR"
poetry run python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
# replace .jsonl with .swebench.jsonl in filename
SWEBENCH_FORMAT_JSONL=${PROCESS_FILEPATH/.jsonl/.swebench.jsonl}
echo "SWEBENCH_FORMAT_JSONL: $SWEBENCH_FORMAT_JSONL"
# assert that the file exists
if [ ! -f $SWEBENCH_FORMAT_JSONL ]; then
echo "Error: $SWEBENCH_FORMAT_JSONL does not exist. There is probably an error in the conversion process."
exit 1
fi
SWEBENCH_FORMAT_JSONL=$(realpath $SWEBENCH_FORMAT_JSONL)
fi
# ================================================
echo "=============================================================="
echo "Running SWE-bench evaluation"
echo "=============================================================="
RUN_ID=$(date +"%Y%m%d_%H%M%S")
N_PROCESS=16
if [ -z "$INSTANCE_ID" ]; then
echo "Running SWE-bench evaluation on the whole input file..."
# Default to SWE-Bench-lite
# change `--dataset_name` and `--split` to alter dataset
poetry run python -m visualswebench.harness.run_evaluation \
--dataset_name "$DATASET_NAME" \
--split "$SPLIT" \
--predictions_path $SWEBENCH_FORMAT_JSONL \
--timeout 1800 \
--cache_level instance \
--max_workers $N_PROCESS \
--run_id $RUN_ID
# get the "model_name_or_path" from the first line of the SWEBENCH_FORMAT_JSONL
MODEL_NAME_OR_PATH=$(jq -r '.model_name_or_path' $SWEBENCH_FORMAT_JSONL | head -n 1)
echo "MODEL_NAME_OR_PATH: $MODEL_NAME_OR_PATH"
RESULT_OUTPUT_DIR=$(dirname $SWEBENCH_FORMAT_JSONL)
echo "RESULT_OUTPUT_DIR: $RESULT_OUTPUT_DIR"
# move the eval results to the target directory
mkdir -p $RESULT_OUTPUT_DIR
# rm eval_outputs directory if it exists
if [ -d $RESULT_OUTPUT_DIR/eval_outputs ]; then
rm -rf $RESULT_OUTPUT_DIR/eval_outputs
fi
mv logs/run_evaluation/$RUN_ID/$MODEL_NAME_OR_PATH $RESULT_OUTPUT_DIR
mv $RESULT_OUTPUT_DIR/$MODEL_NAME_OR_PATH $RESULT_OUTPUT_DIR/eval_outputs
echo "RUN_ID: $RUN_ID" > $RESULT_OUTPUT_DIR/run_id.txt
# move report file
REPORT_PATH=$MODEL_NAME_OR_PATH.$RUN_ID.json
if [ -f $REPORT_PATH ]; then
# check if $RESULT_OUTPUT_DIR/report.json exists
if [ -f $RESULT_OUTPUT_DIR/report.json ]; then
echo "Report file $RESULT_OUTPUT_DIR/report.json already exists. Overwriting..."
if [ -f $RESULT_OUTPUT_DIR/report.json.bak ]; then
rm $RESULT_OUTPUT_DIR/report.json.bak
fi
mv $RESULT_OUTPUT_DIR/report.json $RESULT_OUTPUT_DIR/report.json.bak
fi
mv $REPORT_PATH $RESULT_OUTPUT_DIR/report.json
fi
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
else
echo "Running SWE-bench evaluation on the instance_id: $INSTANCE_ID"
poetry run python -m visualswebench.harness.run_evaluation \
--dataset_name "$DATASET_NAME" \
--split "$SPLIT" \
--predictions_path $SWEBENCH_FORMAT_JSONL \
--timeout 1800 \
--instance_ids $INSTANCE_ID \
--cache_level instance \
--max_workers $N_PROCESS \
--run_id $RUN_ID
fi

View File

@@ -0,0 +1,117 @@
#!/bin/bash
set -eo pipefail
source "evaluation/utils/version_control.sh"
MODEL_CONFIG=$1
COMMIT_HASH=$2
AGENT=$3
EVAL_LIMIT=$4
MAX_ITER=$5
NUM_WORKERS=$6
DATASET=$7
SPLIT=$8
N_RUNS=$9
if [ -z "$NUM_WORKERS" ]; then
NUM_WORKERS=1
echo "Number of workers not specified, use default $NUM_WORKERS"
fi
checkout_eval_branch
if [ -z "$AGENT" ]; then
echo "Agent not specified, use default CodeActAgent"
AGENT="CodeActAgent"
fi
if [ -z "$MAX_ITER" ]; then
echo "MAX_ITER not specified, use default 100"
MAX_ITER=100
fi
if [ -z "$USE_INSTANCE_IMAGE" ]; then
echo "USE_INSTANCE_IMAGE not specified, use default true"
USE_INSTANCE_IMAGE=true
fi
if [ -z "$RUN_WITH_BROWSING" ]; then
echo "RUN_WITH_BROWSING not specified, use default false"
RUN_WITH_BROWSING=false
fi
if [ -z "$DATASET" ]; then
echo "DATASET not specified, use default luolin101/Visual-SWE-bench"
DATASET="luolin101/Visual-SWE-bench"
fi
if [ -z "$SPLIT" ]; then
echo "SPLIT not specified, use default test"
SPLIT="test"
fi
export USE_INSTANCE_IMAGE=$USE_INSTANCE_IMAGE
echo "USE_INSTANCE_IMAGE: $USE_INSTANCE_IMAGE"
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
get_openhands_version
echo "AGENT: $AGENT"
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
echo "MODEL_CONFIG: $MODEL_CONFIG"
echo "DATASET: $DATASET"
echo "SPLIT: $SPLIT"
# Default to NOT use Hint
if [ -z "$USE_HINT_TEXT" ]; then
export USE_HINT_TEXT=false
fi
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
EVAL_NOTE="$OPENHANDS_VERSION"
# if not using Hint, add -no-hint to the eval note
if [ "$USE_HINT_TEXT" = false ]; then
EVAL_NOTE="$EVAL_NOTE-no-hint"
fi
if [ "$RUN_WITH_BROWSING" = true ]; then
EVAL_NOTE="$EVAL_NOTE-with-browsing"
fi
if [ -n "$EXP_NAME" ]; then
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
fi
function run_eval() {
local eval_note=$1
COMMAND="poetry run python evaluation/benchmarks/visual_swe_bench/run_infer.py \
--agent-cls $AGENT \
--llm-config $MODEL_CONFIG \
--max-iterations $MAX_ITER \
--eval-num-workers $NUM_WORKERS \
--eval-note $eval_note \
--dataset $DATASET \
--split $SPLIT"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
fi
# Run the command
eval $COMMAND
}
unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
if [ -z "$N_RUNS" ]; then
N_RUNS=1
echo "N_RUNS not specified, use default $N_RUNS"
fi
for i in $(seq 1 $N_RUNS); do
current_eval_note="$EVAL_NOTE-run_$i"
echo "EVAL_NOTE: $current_eval_note"
run_eval $current_eval_note
done
checkout_original_branch

View File

@@ -0,0 +1,40 @@
#!/bin/bash
source ~/.bashrc
SWEUTIL_DIR=/swe_util
# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
# SWE_INSTANCE_ID=django__django-11099
if [ -z "$SWE_INSTANCE_ID" ]; then
echo "Error: SWE_INSTANCE_ID is not set." >&2
exit 1
fi
# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-instance.json)
if [[ -z "$item" ]]; then
echo "No item found for the provided instance ID."
exit 1
fi
WORKSPACE_NAME=$(echo "$item" | jq -r '(.repo | tostring) + "__" + (.version | tostring) | gsub("/"; "__")')
echo "WORKSPACE_NAME: $WORKSPACE_NAME"
# Clear the workspace
if [ -d /workspace ]; then
rm -rf /workspace/*
else
mkdir /workspace
fi
# Copy repo to workspace
if [ -d /workspace/$WORKSPACE_NAME ]; then
rm -rf /workspace/$WORKSPACE_NAME
fi
mkdir -p /workspace
cp -r /testbed /workspace/$WORKSPACE_NAME
# Activate instance-specific environment
. /opt/miniconda3/etc/profile.d/conda.sh
conda activate testbed

View File

@@ -2,6 +2,7 @@
echo "Running frontend checks..."
cd frontend
npm run check-unlocalized-strings
npm run check-translation-completeness
npx lint-staged
# Run backend pre-commit

View File

@@ -61,7 +61,7 @@ make build
# Start the application
make run
```
Or to run backend and frontend seperately.
Or to run backend and frontend separately.
```sh
# Start the backend from the root directory

View File

@@ -10,11 +10,7 @@ describe("ChatMessage", () => {
expect(screen.getByText("Hello, World!")).toBeInTheDocument();
});
it("should render an assistant message", () => {
render(<ChatMessage type="assistant" message="Hello, World!" />);
expect(screen.getByTestId("assistant-message")).toBeInTheDocument();
expect(screen.getByText("Hello, World!")).toBeInTheDocument();
});
it.todo("should render an assistant message");
it.skip("should support code syntax highlighting", () => {
const code = "```js\nconsole.log('Hello, World!')\n```";
@@ -66,10 +62,7 @@ describe("ChatMessage", () => {
it("should apply correct styles to inline code", () => {
render(
<ChatMessage
type="assistant"
message="Here is some `inline code` text"
/>,
<ChatMessage type="agent" message="Here is some `inline code` text" />,
);
const codeElement = screen.getByText("inline code");

View File

@@ -4,6 +4,7 @@ import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import { ActionSuggestions } from "#/components/features/chat/action-suggestions";
import OpenHands from "#/api/open-hands";
import { MOCK_DEFAULT_USER_SETTINGS } from "#/mocks/handlers";
import { ConversationProvider } from "#/context/conversation-context";
// Mock dependencies
vi.mock("posthog-js", () => ({
@@ -38,12 +39,20 @@ vi.mock("react-i18next", () => ({
}),
}));
vi.mock("react-router", () => ({
useParams: () => ({
conversationId: "test-conversation-id",
}),
}));
const renderActionSuggestions = () =>
render(<ActionSuggestions onSuggestionsClick={() => {}} />, {
wrapper: ({ children }) => (
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
<ConversationProvider>
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
</ConversationProvider>
),
});
@@ -65,6 +74,11 @@ describe("ActionSuggestions", () => {
});
it("should render both GitHub buttons when GitHub token is set and repository is selected", async () => {
const getConversationSpy = vi.spyOn(OpenHands, "getConversation");
// @ts-expect-error - only required for testing
getConversationSpy.mockResolvedValue({
selected_repository: "test-repo",
});
renderActionSuggestions();
// Find all buttons with data-testid="suggestion"

View File

@@ -1,11 +1,9 @@
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
import { act, screen, waitFor, within } from "@testing-library/react";
import { screen, waitFor, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { renderWithProviders } from "test-utils";
import type { Message } from "#/message";
import { addUserMessage } from "#/state/chat-slice";
import { SUGGESTIONS } from "#/utils/suggestions";
import * as ChatSlice from "#/state/chat-slice";
import { WsClientProviderStatus } from "#/context/ws-client-provider";
import { ChatInterface } from "#/components/features/chat/chat-interface";
@@ -42,51 +40,10 @@ describe("Empty state", () => {
vi.clearAllMocks();
});
it("should render suggestions if empty", () => {
const { store } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});
expect(screen.getByTestId("suggestions")).toBeInTheDocument();
act(() => {
store.dispatch(
addUserMessage({
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
pending: true,
}),
);
});
expect(screen.queryByTestId("suggestions")).not.toBeInTheDocument();
});
it.todo("should render suggestions if empty");
it("should render the default suggestions", () => {
renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});
renderWithProviders(<ChatInterface />);
const suggestions = screen.getByTestId("suggestions");
const repoSuggestions = Object.keys(SUGGESTIONS.repo);
@@ -110,21 +67,8 @@ describe("Empty state", () => {
status: WsClientProviderStatus.CONNECTED,
isLoadingMessages: false,
}));
const addUserMessageSpy = vi.spyOn(ChatSlice, "addUserMessage");
const user = userEvent.setup();
const { store } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});
renderWithProviders(<ChatInterface />);
const suggestions = screen.getByTestId("suggestions");
const displayedSuggestions = within(suggestions).getAllByRole("button");
@@ -133,9 +77,7 @@ describe("Empty state", () => {
await user.click(displayedSuggestions[0]);
// user message loaded to input
expect(addUserMessageSpy).not.toHaveBeenCalled();
expect(screen.queryByTestId("suggestions")).toBeInTheDocument();
expect(store.getState().chat.messages).toHaveLength(0);
expect(input).toHaveValue(displayedSuggestions[0].textContent);
},
);
@@ -149,19 +91,7 @@ describe("Empty state", () => {
isLoadingMessages: false,
}));
const user = userEvent.setup();
const { rerender } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});
const { rerender } = renderWithProviders(<ChatInterface />);
const suggestions = screen.getByTestId("suggestions");
const displayedSuggestions = within(suggestions).getAllByRole("button");

View File

@@ -20,7 +20,6 @@ describe("AccountSettingsContextMenu", () => {
<AccountSettingsContextMenu
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
@@ -35,7 +34,6 @@ describe("AccountSettingsContextMenu", () => {
<AccountSettingsContextMenu
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
@@ -45,19 +43,18 @@ describe("AccountSettingsContextMenu", () => {
expect(onLogoutMock).toHaveBeenCalledOnce();
});
test("onLogout should be disabled if the user is not logged in", async () => {
test("logout button is always enabled", async () => {
render(
<AccountSettingsContextMenu
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn={false}
/>,
);
const logoutOption = screen.getByText("ACCOUNT_SETTINGS$LOGOUT");
await user.click(logoutOption);
expect(onLogoutMock).not.toHaveBeenCalled();
expect(onLogoutMock).toHaveBeenCalledOnce();
});
it("should call onClose when clicking outside of the element", async () => {
@@ -65,7 +62,6 @@ describe("AccountSettingsContextMenu", () => {
<AccountSettingsContextMenu
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);

View File

@@ -45,6 +45,8 @@ describe("ConversationPanel", () => {
last_updated_at: "2021-10-01T12:00:00Z",
created_at: "2021-10-01T12:00:00Z",
status: "STOPPED" as const,
url: null,
session_api_key: null,
},
{
conversation_id: "2",
@@ -53,6 +55,8 @@ describe("ConversationPanel", () => {
last_updated_at: "2021-10-02T12:00:00Z",
created_at: "2021-10-02T12:00:00Z",
status: "STOPPED" as const,
url: null,
session_api_key: null,
},
{
conversation_id: "3",
@@ -61,6 +65,8 @@ describe("ConversationPanel", () => {
last_updated_at: "2021-10-03T12:00:00Z",
created_at: "2021-10-03T12:00:00Z",
status: "STOPPED" as const,
url: null,
session_api_key: null,
},
];
@@ -143,6 +149,8 @@ describe("ConversationPanel", () => {
last_updated_at: "2021-10-01T12:00:00Z",
created_at: "2021-10-01T12:00:00Z",
status: "STOPPED" as const,
url: null,
session_api_key: null,
},
{
conversation_id: "2",
@@ -151,6 +159,8 @@ describe("ConversationPanel", () => {
last_updated_at: "2021-10-02T12:00:00Z",
created_at: "2021-10-02T12:00:00Z",
status: "STOPPED" as const,
url: null,
session_api_key: null,
},
{
conversation_id: "3",
@@ -159,6 +169,8 @@ describe("ConversationPanel", () => {
last_updated_at: "2021-10-03T12:00:00Z",
created_at: "2021-10-03T12:00:00Z",
status: "STOPPED" as const,
url: null,
session_api_key: null,
},
];

View File

@@ -1,89 +0,0 @@
import { screen } from "@testing-library/react";
import { describe, expect, it, vi } from "vitest";
import { renderWithProviders } from "test-utils";
import { GitRepositorySelector } from "#/components/features/git/git-repo-selector";
import OpenHands from "#/api/open-hands";
import { Provider } from "#/types/settings";
describe("GitRepositorySelector", () => {
const onInputChangeMock = vi.fn();
const onSelectMock = vi.fn();
it("should render the search input", () => {
renderWithProviders(
<GitRepositorySelector
onInputChange={onInputChangeMock}
onSelect={onSelectMock}
publicRepositories={[]}
userRepositories={[]}
/>,
);
expect(
screen.getByPlaceholderText("LANDING$SELECT_GIT_REPO"),
).toBeInTheDocument();
});
it("should show the GitHub login button in OSS mode", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue({
APP_MODE: "oss",
APP_SLUG: "openhands",
GITHUB_CLIENT_ID: "test-client-id",
POSTHOG_CLIENT_KEY: "test-posthog-key",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
});
renderWithProviders(
<GitRepositorySelector
onInputChange={onInputChangeMock}
onSelect={onSelectMock}
publicRepositories={[]}
userRepositories={[]}
/>,
);
expect(screen.getByTestId("github-repo-selector")).toBeInTheDocument();
});
it("should show the search results", () => {
const mockSearchedRepos = [
{
id: 1,
full_name: "test/repo1",
git_provider: "github" as Provider,
stargazers_count: 100,
is_public: true,
pushed_at: "2023-01-01T00:00:00Z",
},
{
id: 2,
full_name: "test/repo2",
git_provider: "github" as Provider,
stargazers_count: 200,
is_public: true,
pushed_at: "2023-01-02T00:00:00Z",
},
];
const searchPublicRepositoriesSpy = vi.spyOn(
OpenHands,
"searchGitRepositories",
);
searchPublicRepositoriesSpy.mockResolvedValue(mockSearchedRepos);
renderWithProviders(
<GitRepositorySelector
onInputChange={onInputChangeMock}
onSelect={onSelectMock}
publicRepositories={[]}
userRepositories={[]}
/>,
);
expect(screen.getByTestId("github-repo-selector")).toBeInTheDocument();
});
});

View File

@@ -43,7 +43,6 @@ describe("HomeHeader", () => {
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
"gui",
undefined,
undefined,
undefined,

View File

@@ -22,7 +22,7 @@ const renderRepoConnector = () => {
path: "/conversations/:conversationId",
},
{
Component: Outlet,
Component: () => <Outlet />,
path: "/settings",
children: [
{
@@ -173,7 +173,6 @@ describe("RepoConnector", () => {
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
"gui",
"rbren/polaris",
"github",
undefined,

View File

@@ -0,0 +1,259 @@
import { render, screen } from "@testing-library/react";
import { describe, expect, vi, beforeEach, it } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import { RepositorySelectionForm } from "../../../../src/components/features/home/repo-selection-form";
import OpenHands from "#/api/open-hands";
import { GitRepository } from "#/types/git";
// Create mock functions
const mockUseUserRepositories = vi.fn();
const mockUseCreateConversation = vi.fn();
const mockUseIsCreatingConversation = vi.fn();
const mockUseTranslation = vi.fn();
const mockUseAuth = vi.fn();
// Setup default mock returns
mockUseUserRepositories.mockReturnValue({
data: [],
isLoading: false,
isError: false,
});
mockUseCreateConversation.mockReturnValue({
mutate: vi.fn(),
isPending: false,
isSuccess: false,
});
mockUseIsCreatingConversation.mockReturnValue(false);
mockUseTranslation.mockReturnValue({ t: (key: string) => key });
mockUseAuth.mockReturnValue({
isAuthenticated: true,
isLoading: false,
providersAreSet: true,
user: {
id: 1,
login: "testuser",
avatar_url: "https://example.com/avatar.png",
name: "Test User",
email: "test@example.com",
company: "Test Company",
},
login: vi.fn(),
logout: vi.fn(),
});
vi.mock("#/hooks/mutation/use-create-conversation", () => ({
useCreateConversation: () => mockUseCreateConversation(),
}));
vi.mock("#/hooks/use-is-creating-conversation", () => ({
useIsCreatingConversation: () => mockUseIsCreatingConversation(),
}));
vi.mock("react-i18next", () => ({
useTranslation: () => mockUseTranslation(),
}));
vi.mock("#/context/auth-context", () => ({
useAuth: () => mockUseAuth(),
}));
vi.mock("#/hooks/use-debounce", () => ({
useDebounce: (value: string) => value,
}));
const mockOnRepoSelection = vi.fn();
const renderForm = () =>
render(<RepositorySelectionForm onRepoSelection={mockOnRepoSelection} />, {
wrapper: ({ children }) => (
<QueryClientProvider
client={
new QueryClient({
defaultOptions: {
queries: {
retry: false,
},
},
})
}
>
{children}
</QueryClientProvider>
),
});
describe("RepositorySelectionForm", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("shows loading indicator when repositories are being fetched", () => {
const MOCK_REPOS: GitRepository[] = [
{
id: 1,
full_name: "user/repo1",
git_provider: "github",
is_public: true,
},
{
id: 2,
full_name: "user/repo2",
git_provider: "github",
is_public: true,
},
];
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_REPOS);
renderForm();
// Check if loading indicator is displayed
expect(screen.getByTestId("repo-dropdown-loading")).toBeInTheDocument();
expect(screen.getByText("HOME$LOADING_REPOSITORIES")).toBeInTheDocument();
});
it("shows dropdown when repositories are loaded", async () => {
const MOCK_REPOS: GitRepository[] = [
{
id: 1,
full_name: "user/repo1",
git_provider: "github",
is_public: true,
},
{
id: 2,
full_name: "user/repo2",
git_provider: "github",
is_public: true,
},
];
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_REPOS);
renderForm();
expect(await screen.findByTestId("repo-dropdown")).toBeInTheDocument();
});
it("shows error message when repository fetch fails", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockRejectedValue(
new Error("Failed to load"),
);
renderForm();
expect(
await screen.findByTestId("repo-dropdown-error"),
).toBeInTheDocument();
expect(
screen.getByText("HOME$FAILED_TO_LOAD_REPOSITORIES"),
).toBeInTheDocument();
});
it("should call the search repos API when searching a URL", async () => {
const MOCK_REPOS: GitRepository[] = [
{
id: 1,
full_name: "user/repo1",
git_provider: "github",
is_public: true,
},
{
id: 2,
full_name: "user/repo2",
git_provider: "github",
is_public: true,
},
];
const MOCK_SEARCH_REPOS: GitRepository[] = [
{
id: 3,
full_name: "kubernetes/kubernetes",
git_provider: "github",
is_public: true,
},
];
const searchGitReposSpy = vi.spyOn(OpenHands, "searchGitRepositories");
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
"retrieveUserGitRepositories",
);
searchGitReposSpy.mockResolvedValue(MOCK_SEARCH_REPOS);
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_REPOS);
renderForm();
const input = await screen.findByTestId("repo-dropdown");
await userEvent.click(input);
for (const repo of MOCK_REPOS) {
expect(screen.getByText(repo.full_name)).toBeInTheDocument();
}
expect(
screen.queryByText(MOCK_SEARCH_REPOS[0].full_name),
).not.toBeInTheDocument();
expect(searchGitReposSpy).not.toHaveBeenCalled();
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
"kubernetes/kubernetes",
3,
);
expect(
screen.getByText(MOCK_SEARCH_REPOS[0].full_name),
).toBeInTheDocument();
for (const repo of MOCK_REPOS) {
expect(screen.queryByText(repo.full_name)).not.toBeInTheDocument();
}
});
it("should call onRepoSelection when a searched repository is selected", async () => {
const MOCK_SEARCH_REPOS: GitRepository[] = [
{
id: 3,
full_name: "kubernetes/kubernetes",
git_provider: "github",
is_public: true,
},
];
const searchGitReposSpy = vi.spyOn(OpenHands, "searchGitRepositories");
searchGitReposSpy.mockResolvedValue(MOCK_SEARCH_REPOS);
renderForm();
const input = await screen.findByTestId("repo-dropdown");
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
"kubernetes/kubernetes",
3,
);
const searchedRepo = screen.getByText(MOCK_SEARCH_REPOS[0].full_name);
expect(searchedRepo).toBeInTheDocument();
await userEvent.click(searchedRepo);
expect(mockOnRepoSelection).toHaveBeenCalledWith(
MOCK_SEARCH_REPOS[0].full_name,
);
});
});

View File

@@ -85,7 +85,6 @@ describe("TaskCard", () => {
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledWith(
"suggested_task",
MOCK_RESPOSITORIES[0].full_name,
MOCK_RESPOSITORIES[0].git_provider,
undefined,

View File

@@ -11,7 +11,7 @@ import { MOCK_TASKS } from "#/mocks/task-suggestions-handlers";
const renderTaskSuggestions = () => {
const RouterStub = createRoutesStub([
{
Component: TaskSuggestions,
Component: () => <TaskSuggestions />,
path: "/",
},
{

View File

@@ -0,0 +1,59 @@
import { render, screen } from "@testing-library/react";
import { describe, expect, it, vi } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import { ApiKeysManager } from "#/components/features/settings/api-keys-manager";
// Mock the react-i18next
vi.mock("react-i18next", async () => {
const actual = await vi.importActual<typeof import("react-i18next")>("react-i18next");
return {
...actual,
useTranslation: () => ({
t: (key: string) => key,
}),
Trans: ({ i18nKey, components }: { i18nKey: string; components: Record<string, React.ReactNode> }) => {
// Simplified Trans component that renders the link
if (i18nKey === "SETTINGS$API_KEYS_DESCRIPTION") {
return (
<span>
API keys allow you to authenticate with the OpenHands API programmatically.
Keep your API keys secure; anyone with your API key can access your account.
For more information on how to use the API, see our {components.a}
</span>
);
}
return <span>{i18nKey}</span>;
},
};
});
// Mock the API keys hook
vi.mock("#/hooks/query/use-api-keys", () => ({
useApiKeys: () => ({
data: [],
isLoading: false,
error: null,
}),
}));
describe("ApiKeysManager", () => {
const renderComponent = () => {
const queryClient = new QueryClient();
return render(
<QueryClientProvider client={queryClient}>
<ApiKeysManager />
</QueryClientProvider>
);
};
it("should render the API documentation link", () => {
renderComponent();
// Find the link to the API documentation
const link = screen.getByRole("link");
expect(link).toBeInTheDocument();
expect(link).toHaveAttribute("href", "https://docs.all-hands.dev/modules/usage/cloud/cloud-api");
expect(link).toHaveAttribute("target", "_blank");
expect(link).toHaveAttribute("rel", "noopener noreferrer");
});
});

View File

@@ -1,92 +1,11 @@
import { render, screen } from "@testing-library/react";
import { describe, it, expect, vi } from "vitest";
import { Messages } from "#/components/features/chat/messages";
import type { Message } from "#/message";
import { renderWithProviders } from "test-utils";
// Mock the useParams hook to provide a conversationId
vi.mock("react-router", async () => {
const actual = await vi.importActual<typeof import("react-router")>("react-router");
return {
...actual,
useParams: () => ({ conversationId: "test-conversation-id" }),
};
});
import { describe, it } from "vitest";
describe("File Operations Messages", () => {
it("should show success indicator for successful file read operation", () => {
const messages: Message[] = [
{
type: "action",
translationID: "read_file_contents",
content: "Successfully read file contents",
success: true,
sender: "assistant",
timestamp: new Date().toISOString(),
},
];
it.todo("should show success indicator for successful file read operation");
renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
it.todo("should show failure indicator for failed file read operation");
const statusIcon = screen.getByTestId("status-icon");
expect(statusIcon).toBeInTheDocument();
expect(statusIcon.closest("svg")).toHaveClass("fill-success");
});
it.todo("should show success indicator for successful file edit operation");
it("should show failure indicator for failed file read operation", () => {
const messages: Message[] = [
{
type: "action",
translationID: "read_file_contents",
content: "Failed to read file contents",
success: false,
sender: "assistant",
timestamp: new Date().toISOString(),
},
];
renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
const statusIcon = screen.getByTestId("status-icon");
expect(statusIcon).toBeInTheDocument();
expect(statusIcon.closest("svg")).toHaveClass("fill-danger");
});
it("should show success indicator for successful file edit operation", () => {
const messages: Message[] = [
{
type: "action",
translationID: "edit_file_contents",
content: "Successfully edited file contents",
success: true,
sender: "assistant",
timestamp: new Date().toISOString(),
},
];
renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
const statusIcon = screen.getByTestId("status-icon");
expect(statusIcon).toBeInTheDocument();
expect(statusIcon.closest("svg")).toHaveClass("fill-success");
});
it("should show failure indicator for failed file edit operation", () => {
const messages: Message[] = [
{
type: "action",
translationID: "edit_file_contents",
content: "Failed to edit file contents",
success: false,
sender: "assistant",
timestamp: new Date().toISOString(),
},
];
renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
const statusIcon = screen.getByTestId("status-icon");
expect(statusIcon).toBeInTheDocument();
expect(statusIcon.closest("svg")).toHaveClass("fill-danger");
});
it.todo("should show failure indicator for failed file edit operation");
});

View File

@@ -57,7 +57,7 @@ describe("UserActions", () => {
).not.toBeInTheDocument();
});
test("onLogout should not be called when the user is not logged in", async () => {
test("logout button is always enabled", async () => {
render(<UserActions onLogout={onLogoutMock} />);
const userAvatar = screen.getByTestId("user-avatar");
@@ -66,6 +66,6 @@ describe("UserActions", () => {
const logoutOption = screen.getByText("ACCOUNT_SETTINGS$LOGOUT");
await user.click(logoutOption);
expect(onLogoutMock).not.toHaveBeenCalled();
expect(onLogoutMock).toHaveBeenCalledOnce();
});
});

View File

@@ -2,7 +2,6 @@ import { describe, it, expect, vi, beforeEach } from "vitest";
import { render, waitFor } from "@testing-library/react";
import React from "react";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import * as ChatSlice from "#/state/chat-slice";
import {
updateStatusWhenErrorMessagePresent,
WsClientProvider,
@@ -11,42 +10,15 @@ import {
describe("Propagate error message", () => {
it("should do nothing when no message was passed from server", () => {
const addErrorMessageSpy = vi.spyOn(ChatSlice, "addErrorMessage");
updateStatusWhenErrorMessagePresent(null);
updateStatusWhenErrorMessagePresent(undefined);
updateStatusWhenErrorMessagePresent({});
updateStatusWhenErrorMessagePresent({ message: null });
expect(addErrorMessageSpy).not.toHaveBeenCalled();
});
it("should display error to user when present", () => {
const message = "We have a problem!";
const addErrorMessageSpy = vi.spyOn(ChatSlice, "addErrorMessage");
updateStatusWhenErrorMessagePresent({ message });
it.todo("should display error to user when present");
expect(addErrorMessageSpy).toHaveBeenCalledWith({
message,
status_update: true,
type: "error",
});
});
it("should display error including translation id when present", () => {
const message = "We have a problem!";
const addErrorMessageSpy = vi.spyOn(ChatSlice, "addErrorMessage");
updateStatusWhenErrorMessagePresent({
message,
data: { msg_id: "..id.." },
});
expect(addErrorMessageSpy).toHaveBeenCalledWith({
message,
id: "..id..",
status_update: true,
type: "error",
});
});
it.todo("should display error including translation id when present");
});
// Create a mock for socket.io-client
@@ -84,6 +56,19 @@ function TestComponent() {
describe("WsClientProvider", () => {
beforeEach(() => {
vi.clearAllMocks();
vi.mock("#/hooks/query/use-user-conversation", () => ({
useUserConversation: () => {
return { data: {
conversation_id: "1",
title: "Conversation 1",
selected_repository: null,
last_updated_at: "2021-10-01T12:00:00Z",
created_at: "2021-10-01T12:00:00Z",
status: "STOPPED" as const,
url: null,
session_api_key: null,
}}},
}));
});
it("should emit oh_user_action event when send is called", async () => {

View File

@@ -11,7 +11,6 @@ describe("Translations", () => {
<AccountSettingsContextMenu
onLogout={() => {}}
onClose={() => {}}
isLoggedIn
/>,
);
expect(

View File

@@ -48,7 +48,7 @@ describe("Content", () => {
await waitFor(() => {
expect(provider).toHaveValue("Anthropic");
expect(model).toHaveValue("claude-3-5-sonnet-20241022");
expect(model).toHaveValue("claude-3-7-sonnet-20250219");
expect(apiKey).toHaveValue("");
expect(apiKey).toHaveProperty("placeholder", "");
@@ -135,7 +135,7 @@ describe("Content", () => {
);
const condensor = screen.getByTestId("enable-memory-condenser-switch");
expect(model).toHaveValue("anthropic/claude-3-5-sonnet-20241022");
expect(model).toHaveValue("anthropic/claude-3-7-sonnet-20250219");
expect(baseUrl).toHaveValue("");
expect(apiKey).toHaveValue("");
expect(apiKey).toHaveProperty("placeholder", "");
@@ -542,7 +542,7 @@ describe("Form submission", () => {
// select model
await userEvent.click(model);
const modelOption = screen.getByText("claude-3-5-sonnet-20241022");
const modelOption = screen.getByText("claude-3-7-sonnet-20250219");
await userEvent.click(modelOption);
const submitButton = screen.getByTestId("submit-button");
@@ -550,7 +550,7 @@ describe("Form submission", () => {
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
llm_model: "anthropic/claude-3-5-sonnet-20241022",
llm_model: "anthropic/claude-3-7-sonnet-20250219",
llm_base_url: "",
confirmation_mode: false,
}),

View File

@@ -0,0 +1,565 @@
import { render, screen, waitFor, within } from "@testing-library/react";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import { createRoutesStub, Outlet } from "react-router";
import SecretsSettingsScreen from "#/routes/secrets-settings";
import { SecretsService } from "#/api/secrets-service";
import { GetSecretsResponse } from "#/api/secrets-service.types";
import OpenHands from "#/api/open-hands";
import { MOCK_DEFAULT_USER_SETTINGS } from "#/mocks/handlers";
const MOCK_GET_SECRETS_RESPONSE: GetSecretsResponse["custom_secrets"] = [
{
name: "My_Secret_1",
description: "My first secret",
},
{
name: "My_Secret_2",
description: "My second secret",
},
];
const RouterStub = createRoutesStub([
{
Component: () => <Outlet />,
path: "/settings",
children: [
{
Component: SecretsSettingsScreen,
path: "/settings/secrets",
},
{
Component: () => <div data-testid="git-settings-screen" />,
path: "/settings/git",
},
],
},
]);
const renderSecretsSettings = () =>
render(<RouterStub initialEntries={["/settings/secrets"]} />, {
wrapper: ({ children }) => (
<QueryClientProvider
client={
new QueryClient({
defaultOptions: { queries: { retry: false } },
})
}
>
{children}
</QueryClientProvider>
),
});
beforeEach(() => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return the config we need
getConfigSpy.mockResolvedValue({
APP_MODE: "oss",
});
});
describe("Content", () => {
it("should render the secrets settings screen", () => {
renderSecretsSettings();
screen.getByTestId("secrets-settings-screen");
});
it("should NOT render a button to connect with git if they havent already in oss", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
// @ts-expect-error - only return the config we need
getConfigSpy.mockResolvedValue({
APP_MODE: "oss",
});
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {},
});
renderSecretsSettings();
expect(getConfigSpy).toHaveBeenCalled();
await waitFor(() => expect(getSecretsSpy).toHaveBeenCalled());
expect(screen.queryByTestId("connect-git-button")).not.toBeInTheDocument();
});
it("should render a button to connect with git if they havent already in saas", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
// @ts-expect-error - only return the config we need
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
});
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {},
});
renderSecretsSettings();
expect(getSecretsSpy).not.toHaveBeenCalled();
await waitFor(() =>
expect(screen.queryByTestId("add-secret-button")).not.toBeInTheDocument(),
);
const button = await screen.findByTestId("connect-git-button");
await userEvent.click(button);
screen.getByTestId("git-settings-screen");
});
it("should render a message if there are no existing secrets", async () => {
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
getSecretsSpy.mockResolvedValue([]);
renderSecretsSettings();
await screen.findByTestId("no-secrets-message");
});
it("should render existing secrets", async () => {
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
getSecretsSpy.mockResolvedValue(MOCK_GET_SECRETS_RESPONSE);
renderSecretsSettings();
const secrets = await screen.findAllByTestId("secret-item");
expect(secrets).toHaveLength(2);
expect(screen.queryByTestId("no-secrets-message")).not.toBeInTheDocument();
});
});
describe("Secret actions", () => {
it("should create a new secret", async () => {
const createSecretSpy = vi.spyOn(SecretsService, "createSecret");
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
createSecretSpy.mockResolvedValue(true);
renderSecretsSettings();
// render form & hide items
expect(screen.queryByTestId("add-secret-form")).not.toBeInTheDocument();
const button = await screen.findByTestId("add-secret-button");
await userEvent.click(button);
const secretForm = screen.getByTestId("add-secret-form");
const secrets = screen.queryAllByTestId("secret-item");
expect(screen.queryByTestId("add-secret-button")).not.toBeInTheDocument();
expect(secretForm).toBeInTheDocument();
expect(secrets).toHaveLength(0);
// enter details
const nameInput = within(secretForm).getByTestId("name-input");
const valueInput = within(secretForm).getByTestId("value-input");
const descriptionInput =
within(secretForm).getByTestId("description-input");
const submitButton = within(secretForm).getByTestId("submit-button");
vi.clearAllMocks(); // reset mocks to check for upcoming calls
await userEvent.type(nameInput, "My_Custom_Secret");
await userEvent.type(valueInput, "my-custom-secret-value");
await userEvent.type(descriptionInput, "My custom secret description");
await userEvent.click(submitButton);
// make POST request
expect(createSecretSpy).toHaveBeenCalledWith(
"My_Custom_Secret",
"my-custom-secret-value",
"My custom secret description",
);
// hide form & render items
expect(screen.queryByTestId("add-secret-form")).not.toBeInTheDocument();
expect(getSecretsSpy).toHaveBeenCalled();
});
it("should edit a secret", async () => {
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
const updateSecretSpy = vi.spyOn(SecretsService, "updateSecret");
getSecretsSpy.mockResolvedValue(MOCK_GET_SECRETS_RESPONSE);
updateSecretSpy.mockResolvedValue(true);
renderSecretsSettings();
// render edit button within a secret list item
const secrets = await screen.findAllByTestId("secret-item");
const firstSecret = within(secrets[0]);
const editButton = firstSecret.getByTestId("edit-secret-button");
await userEvent.click(editButton);
// render edit form
const editForm = screen.getByTestId("edit-secret-form");
expect(screen.queryByTestId("add-secret-button")).not.toBeInTheDocument();
expect(editForm).toBeInTheDocument();
expect(screen.queryAllByTestId("secret-item")).toHaveLength(0);
// enter details
const nameInput = within(editForm).getByTestId("name-input");
const descriptionInput = within(editForm).getByTestId("description-input");
const submitButton = within(editForm).getByTestId("submit-button");
// should not show value input
const valueInput = within(editForm).queryByTestId("value-input");
expect(valueInput).not.toBeInTheDocument();
expect(nameInput).toHaveValue("My_Secret_1");
expect(descriptionInput).toHaveValue("My first secret");
await userEvent.clear(nameInput);
await userEvent.type(nameInput, "My_Edited_Secret");
await userEvent.clear(descriptionInput);
await userEvent.type(descriptionInput, "My edited secret description");
await userEvent.click(submitButton);
// make POST request
expect(updateSecretSpy).toHaveBeenCalledWith(
"My_Secret_1",
"My_Edited_Secret",
"My edited secret description",
);
// hide form
expect(screen.queryByTestId("edit-secret-form")).not.toBeInTheDocument();
// optimistic update
const updatedSecrets = await screen.findAllByTestId("secret-item");
expect(updatedSecrets).toHaveLength(2);
expect(updatedSecrets[0]).toHaveTextContent(/my_edited_secret/i);
});
it("should be able to cancel the create or edit form", async () => {
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
getSecretsSpy.mockResolvedValue(MOCK_GET_SECRETS_RESPONSE);
renderSecretsSettings();
// render form & hide items
expect(screen.queryByTestId("add-secret-form")).not.toBeInTheDocument();
const button = await screen.findByTestId("add-secret-button");
await userEvent.click(button);
const secretForm = screen.getByTestId("add-secret-form");
expect(secretForm).toBeInTheDocument();
// cancel button
const cancelButton = within(secretForm).getByTestId("cancel-button");
await userEvent.click(cancelButton);
expect(screen.queryByTestId("add-secret-form")).not.toBeInTheDocument();
expect(screen.queryByTestId("add-secret-button")).toBeInTheDocument();
// render edit button within a secret list item
const secrets = await screen.findAllByTestId("secret-item");
const firstSecret = within(secrets[0]);
const editButton = firstSecret.getByTestId("edit-secret-button");
await userEvent.click(editButton);
// render edit form
const editForm = screen.getByTestId("edit-secret-form");
expect(editForm).toBeInTheDocument();
expect(screen.queryAllByTestId("secret-item")).toHaveLength(0);
// cancel button
const cancelEditButton = within(editForm).getByTestId("cancel-button");
await userEvent.click(cancelEditButton);
expect(screen.queryByTestId("edit-secret-form")).not.toBeInTheDocument();
expect(screen.queryAllByTestId("secret-item")).toHaveLength(2);
});
it("should undo the optimistic update if the request fails", async () => {
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
const updateSecretSpy = vi.spyOn(SecretsService, "updateSecret");
getSecretsSpy.mockResolvedValue(MOCK_GET_SECRETS_RESPONSE);
updateSecretSpy.mockRejectedValue(new Error("Failed to update secret"));
renderSecretsSettings();
// render edit button within a secret list item
const secrets = await screen.findAllByTestId("secret-item");
const firstSecret = within(secrets[0]);
const editButton = firstSecret.getByTestId("edit-secret-button");
await userEvent.click(editButton);
// render edit form
const editForm = screen.getByTestId("edit-secret-form");
expect(editForm).toBeInTheDocument();
expect(screen.queryAllByTestId("secret-item")).toHaveLength(0);
// enter details
const nameInput = within(editForm).getByTestId("name-input");
const submitButton = within(editForm).getByTestId("submit-button");
// should not show value input
const valueInput = within(editForm).queryByTestId("value-input");
expect(valueInput).not.toBeInTheDocument();
await userEvent.clear(nameInput);
await userEvent.type(nameInput, "My_Edited_Secret");
await userEvent.click(submitButton);
// make POST request
expect(updateSecretSpy).toHaveBeenCalledWith(
"My_Secret_1",
"My_Edited_Secret",
"My first secret",
);
// hide form
expect(screen.queryByTestId("edit-secret-form")).not.toBeInTheDocument();
// no optimistic update
const updatedSecrets = await screen.findAllByTestId("secret-item");
expect(updatedSecrets).toHaveLength(2);
expect(updatedSecrets[0]).not.toHaveTextContent(/my edited secret/i);
});
it("should remove the secret from the list after deletion", async () => {
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
const deleteSecretSpy = vi.spyOn(SecretsService, "deleteSecret");
getSecretsSpy.mockResolvedValue(MOCK_GET_SECRETS_RESPONSE);
deleteSecretSpy.mockResolvedValue(true);
renderSecretsSettings();
// render delete button within a secret list item
const secrets = await screen.findAllByTestId("secret-item");
const secondSecret = within(secrets[1]);
const deleteButton = secondSecret.getByTestId("delete-secret-button");
await userEvent.click(deleteButton);
// confirmation modal
const confirmationModal = screen.getByTestId("confirmation-modal");
const confirmButton =
within(confirmationModal).getByTestId("confirm-button");
await userEvent.click(confirmButton);
// make DELETE request
expect(deleteSecretSpy).toHaveBeenCalledWith("My_Secret_2");
expect(screen.queryByTestId("confirmation-modal")).not.toBeInTheDocument();
// optimistic update
expect(screen.queryAllByTestId("secret-item")).toHaveLength(1);
expect(screen.queryByText("My_Secret_2")).not.toBeInTheDocument();
});
it("should be able to cancel the delete confirmation modal", async () => {
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
const deleteSecretSpy = vi.spyOn(SecretsService, "deleteSecret");
getSecretsSpy.mockResolvedValue(MOCK_GET_SECRETS_RESPONSE);
deleteSecretSpy.mockResolvedValue(true);
renderSecretsSettings();
// render delete button within a secret list item
const secrets = await screen.findAllByTestId("secret-item");
const secondSecret = within(secrets[1]);
const deleteButton = secondSecret.getByTestId("delete-secret-button");
await userEvent.click(deleteButton);
// confirmation modal
const confirmationModal = screen.getByTestId("confirmation-modal");
const cancelButton = within(confirmationModal).getByTestId("cancel-button");
await userEvent.click(cancelButton);
// no DELETE request
expect(deleteSecretSpy).not.toHaveBeenCalled();
expect(screen.queryByTestId("confirmation-modal")).not.toBeInTheDocument();
expect(screen.queryAllByTestId("secret-item")).toHaveLength(2);
});
it("should revert the optimistic update if the request fails", async () => {
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
const deleteSecretSpy = vi.spyOn(SecretsService, "deleteSecret");
getSecretsSpy.mockResolvedValue(MOCK_GET_SECRETS_RESPONSE);
deleteSecretSpy.mockRejectedValue(new Error("Failed to delete secret"));
renderSecretsSettings();
// render delete button within a secret list item
const secrets = await screen.findAllByTestId("secret-item");
const secondSecret = within(secrets[1]);
const deleteButton = secondSecret.getByTestId("delete-secret-button");
await userEvent.click(deleteButton);
// confirmation modal
const confirmationModal = screen.getByTestId("confirmation-modal");
const confirmButton =
within(confirmationModal).getByTestId("confirm-button");
await userEvent.click(confirmButton);
// make DELETE request
expect(deleteSecretSpy).toHaveBeenCalledWith("My_Secret_2");
expect(screen.queryByTestId("confirmation-modal")).not.toBeInTheDocument();
// optimistic update
expect(screen.queryAllByTestId("secret-item")).toHaveLength(2);
expect(screen.queryByText("My_Secret_2")).toBeInTheDocument();
});
it("should hide the no items message when in form view", async () => {
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
getSecretsSpy.mockResolvedValue([]);
renderSecretsSettings();
// render form & hide items
expect(screen.queryByTestId("no-secrets-message")).not.toBeInTheDocument();
const button = await screen.findByTestId("add-secret-button");
await userEvent.click(button);
const secretForm = screen.getByTestId("add-secret-form");
expect(secretForm).toBeInTheDocument();
expect(screen.queryByTestId("no-secrets-message")).not.toBeInTheDocument();
});
it("should not allow spaces in secret names", async () => {
const createSecretSpy = vi.spyOn(SecretsService, "createSecret");
renderSecretsSettings();
// render form & hide items
expect(screen.queryByTestId("add-secret-form")).not.toBeInTheDocument();
const button = await screen.findByTestId("add-secret-button");
await userEvent.click(button);
const secretForm = screen.getByTestId("add-secret-form");
expect(secretForm).toBeInTheDocument();
// enter details
const nameInput = within(secretForm).getByTestId("name-input");
const valueInput = within(secretForm).getByTestId("value-input");
const submitButton = within(secretForm).getByTestId("submit-button");
await userEvent.type(nameInput, "My Custom Secret With Spaces");
await userEvent.type(valueInput, "my-custom-secret-value");
await userEvent.click(submitButton);
// make POST request
expect(createSecretSpy).not.toHaveBeenCalled();
await userEvent.clear(nameInput);
await userEvent.type(nameInput, "MyCustomSecret");
await userEvent.click(submitButton);
expect(createSecretSpy).toHaveBeenCalledWith(
"MyCustomSecret",
"my-custom-secret-value",
undefined,
);
});
it("should not allow existing secret names", async () => {
const createSecretSpy = vi.spyOn(SecretsService, "createSecret");
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
getSecretsSpy.mockResolvedValue(MOCK_GET_SECRETS_RESPONSE.slice(0, 1));
renderSecretsSettings();
// render form & hide items
expect(screen.queryByTestId("add-secret-form")).not.toBeInTheDocument();
const button = await screen.findByTestId("add-secret-button");
await userEvent.click(button);
const secretForm = screen.getByTestId("add-secret-form");
expect(secretForm).toBeInTheDocument();
// enter details
const nameInput = within(secretForm).getByTestId("name-input");
const valueInput = within(secretForm).getByTestId("value-input");
const submitButton = within(secretForm).getByTestId("submit-button");
await userEvent.type(nameInput, "My_Secret_1");
await userEvent.type(valueInput, "my-custom-secret-value");
await userEvent.click(submitButton);
// make POST request
expect(createSecretSpy).not.toHaveBeenCalled();
expect(screen.queryByText(/secret already exists/i)).toBeInTheDocument();
await userEvent.clear(nameInput);
await userEvent.type(nameInput, "My_Custom_Secret");
await userEvent.clear(valueInput);
await userEvent.type(valueInput, "my-custom-secret-value");
await userEvent.click(submitButton);
expect(createSecretSpy).toHaveBeenCalledWith(
"My_Custom_Secret",
"my-custom-secret-value",
undefined,
);
expect(
screen.queryByText("SECRETS$SECRET_VALUE_REQUIRED"),
).not.toBeInTheDocument();
});
it("should not submit whitespace secret names or values", async () => {
const createSecretSpy = vi.spyOn(SecretsService, "createSecret");
renderSecretsSettings();
// render form & hide items
expect(screen.queryByTestId("add-secret-form")).not.toBeInTheDocument();
const button = await screen.findByTestId("add-secret-button");
await userEvent.click(button);
const secretForm = screen.getByTestId("add-secret-form");
expect(secretForm).toBeInTheDocument();
// enter details
const nameInput = within(secretForm).getByTestId("name-input");
const valueInput = within(secretForm).getByTestId("value-input");
const submitButton = within(secretForm).getByTestId("submit-button");
await userEvent.type(nameInput, " ");
await userEvent.type(valueInput, "my-custom-secret-value");
await userEvent.click(submitButton);
// make POST request
expect(createSecretSpy).not.toHaveBeenCalled();
await userEvent.clear(nameInput);
await userEvent.type(nameInput, "My_Custom_Secret");
await userEvent.clear(valueInput);
await userEvent.type(valueInput, " ");
await userEvent.click(submitButton);
expect(createSecretSpy).not.toHaveBeenCalled();
expect(
screen.queryByText("SECRETS$SECRET_VALUE_REQUIRED"),
).toBeInTheDocument();
});
it("should not reset ipout values on an invalid submit", async () => {
const getSecretsSpy = vi.spyOn(SecretsService, "getSecrets");
const createSecretSpy = vi.spyOn(SecretsService, "createSecret");
getSecretsSpy.mockResolvedValue(MOCK_GET_SECRETS_RESPONSE);
renderSecretsSettings();
// render form & hide items
expect(screen.queryByTestId("add-secret-form")).not.toBeInTheDocument();
const button = await screen.findByTestId("add-secret-button");
await userEvent.click(button);
const secretForm = screen.getByTestId("add-secret-form");
expect(secretForm).toBeInTheDocument();
// enter details
const nameInput = within(secretForm).getByTestId("name-input");
const valueInput = within(secretForm).getByTestId("value-input");
const submitButton = within(secretForm).getByTestId("submit-button");
await userEvent.type(nameInput, MOCK_GET_SECRETS_RESPONSE[0].name);
await userEvent.type(valueInput, "my-custom-secret-value");
await userEvent.click(submitButton);
// make POST request
expect(createSecretSpy).not.toHaveBeenCalled();
expect(screen.queryByText(/secret already exists/i)).toBeInTheDocument();
expect(nameInput).toHaveValue(MOCK_GET_SECRETS_RESPONSE[0].name);
expect(valueInput).toHaveValue("my-custom-secret-value");
});
});

View File

@@ -79,7 +79,7 @@ describe("Settings Screen", () => {
};
it("should render the navbar", async () => {
const sectionsToInclude = ["llm", "git", "application"];
const sectionsToInclude = ["llm", "git", "application", "secrets"];
const sectionsToExclude = ["api keys", "credits"];
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return app mode
@@ -110,7 +110,13 @@ describe("Settings Screen", () => {
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
});
const sectionsToInclude = ["git", "application", "credits", "api keys"];
const sectionsToInclude = [
"git",
"application",
"credits",
"secrets",
"api keys",
];
const sectionsToExclude = ["llm"];
renderSettingsScreen();

View File

@@ -1,146 +0,0 @@
import { describe, it, expect, vi, beforeEach } from "vitest";
import { handleStatusMessage, handleActionMessage } from "#/services/actions";
import store from "#/store";
import { trackError } from "#/utils/error-handler";
import ActionType from "#/types/action-type";
import { ActionMessage } from "#/types/message";
// Mock dependencies
vi.mock("#/utils/error-handler", () => ({
trackError: vi.fn(),
}));
vi.mock("#/store", () => ({
default: {
dispatch: vi.fn(),
},
}));
describe("Actions Service", () => {
beforeEach(() => {
vi.clearAllMocks();
});
describe("handleStatusMessage", () => {
it("should dispatch info messages to status state", () => {
const message = {
type: "info",
message: "Runtime is not available",
id: "runtime.unavailable",
status_update: true as const,
};
handleStatusMessage(message);
expect(store.dispatch).toHaveBeenCalledWith(expect.objectContaining({
payload: message,
}));
});
it("should log error messages and display them in chat", () => {
const message = {
type: "error",
message: "Runtime connection failed",
id: "runtime.connection.failed",
status_update: true as const,
};
handleStatusMessage(message);
expect(trackError).toHaveBeenCalledWith({
message: "Runtime connection failed",
source: "chat",
metadata: { msgId: "runtime.connection.failed" },
});
expect(store.dispatch).toHaveBeenCalledWith(expect.objectContaining({
payload: message,
}));
});
});
describe("handleActionMessage", () => {
it("should use first-person perspective for task completion messages", () => {
// Test partial completion
const messagePartial: ActionMessage = {
id: 1,
action: ActionType.FINISH,
source: "agent",
message: "",
timestamp: new Date().toISOString(),
args: {
final_thought: "",
task_completed: "partial",
outputs: "",
thought: ""
}
};
// Mock implementation to capture the message
let capturedPartialMessage = "";
(store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **completed partially**")) {
capturedPartialMessage = action.payload;
}
});
handleActionMessage(messagePartial);
expect(capturedPartialMessage).toContain("I believe that the task was **completed partially**");
// Test not completed
const messageNotCompleted: ActionMessage = {
id: 2,
action: ActionType.FINISH,
source: "agent",
message: "",
timestamp: new Date().toISOString(),
args: {
final_thought: "",
task_completed: "false",
outputs: "",
thought: ""
}
};
// Mock implementation to capture the message
let capturedNotCompletedMessage = "";
(store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **not completed**")) {
capturedNotCompletedMessage = action.payload;
}
});
handleActionMessage(messageNotCompleted);
expect(capturedNotCompletedMessage).toContain("I believe that the task was **not completed**");
// Test completed successfully
const messageCompleted: ActionMessage = {
id: 3,
action: ActionType.FINISH,
source: "agent",
message: "",
timestamp: new Date().toISOString(),
args: {
final_thought: "",
task_completed: "true",
outputs: "",
thought: ""
}
};
// Mock implementation to capture the message
let capturedCompletedMessage = "";
(store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **completed successfully**")) {
capturedCompletedMessage = action.payload;
}
});
handleActionMessage(messageCompleted);
expect(capturedCompletedMessage).toContain("I believe that the task was **completed successfully**");
});
});
});

View File

@@ -1,51 +0,0 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { handleObservationMessage } from "#/services/observations";
import store from "#/store";
import { ObservationMessage } from "#/types/message";
// Mock dependencies
vi.mock("#/store", () => ({
default: {
dispatch: vi.fn(),
},
}));
describe("Observations Service", () => {
beforeEach(() => {
vi.clearAllMocks();
});
describe("handleObservationMessage", () => {
const createErrorMessage = (): ObservationMessage => ({
id: 14,
timestamp: "2025-04-14T13:37:54.451843",
message: "The action has not been executed.",
cause: 12,
observation: "error",
content: "The action has not been executed.",
extras: {
error_id: "",
metadata: {},
},
});
it("should dispatch error messages exactly once", () => {
const errorMessage = createErrorMessage();
handleObservationMessage(errorMessage);
expect(store.dispatch).toHaveBeenCalledTimes(1);
expect(store.dispatch).toHaveBeenCalledWith({
type: "chat/addAssistantObservation",
payload: expect.objectContaining({
observation: "error",
content: "The action has not been executed.",
source: "user",
extras: {
error_id: "",
},
}),
});
});
});
});

View File

@@ -1,8 +1,4 @@
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { handleObservationMessage } from "#/services/observations";
import { setScreenshotSrc, setUrl } from "#/state/browser-slice";
import ObservationType from "#/types/observation-type";
import store from "#/store";
import { describe, it, vi, beforeEach, afterEach } from "vitest";
// Mock the store module
vi.mock("#/store", () => ({
@@ -20,43 +16,9 @@ describe("handleObservationMessage", () => {
vi.resetAllMocks();
});
it("updates browser state when receiving a browse observation", () => {
const message = {
id: "test-id",
cause: "test-cause",
observation: ObservationType.BROWSE,
content: "test content",
message: "test message",
extras: {
url: "https://example.com",
screenshot: "base64-screenshot-data",
},
};
handleObservationMessage(message);
it.todo("updates browser state when receiving a browse observation");
// Check that setScreenshotSrc and setUrl were called with the correct values
expect(store.dispatch).toHaveBeenCalledWith(setScreenshotSrc("base64-screenshot-data"));
expect(store.dispatch).toHaveBeenCalledWith(setUrl("https://example.com"));
});
it("updates browser state when receiving a browse_interactive observation", () => {
const message = {
id: "test-id",
cause: "test-cause",
observation: ObservationType.BROWSE_INTERACTIVE,
content: "test content",
message: "test message",
extras: {
url: "https://example.com",
screenshot: "base64-screenshot-data",
},
};
handleObservationMessage(message);
// Check that setScreenshotSrc and setUrl were called with the correct values
expect(store.dispatch).toHaveBeenCalledWith(setScreenshotSrc("base64-screenshot-data"));
expect(store.dispatch).toHaveBeenCalledWith(setUrl("https://example.com"));
});
});
it.todo(
"updates browser state when receiving a browse_interactive observation",
);
});

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"name": "openhands-frontend",
"version": "0.37.0",
"version": "0.39.0",
"private": true,
"type": "module",
"engines": {
@@ -8,30 +8,30 @@
},
"dependencies": {
"@heroui/react": "2.7.8",
"@microlink/react-json-view": "^1.26.1",
"@microlink/react-json-view": "^1.26.2",
"@monaco-editor/react": "^4.7.0-rc.0",
"@react-router/node": "^7.5.3",
"@react-router/serve": "^7.5.3",
"@react-types/shared": "^3.29.0",
"@reduxjs/toolkit": "^2.7.0",
"@react-router/node": "^7.6.0",
"@react-router/serve": "^7.6.0",
"@react-types/shared": "^3.29.1",
"@reduxjs/toolkit": "^2.8.2",
"@stripe/react-stripe-js": "^3.7.0",
"@stripe/stripe-js": "^7.3.0",
"@tanstack/react-query": "^5.75.4",
"@tanstack/react-query": "^5.76.1",
"@vitejs/plugin-react": "^4.4.0",
"@xterm/addon-fit": "^0.10.0",
"@xterm/xterm": "^5.4.0",
"axios": "^1.9.0",
"clsx": "^2.1.1",
"eslint-config-airbnb-typescript": "^18.0.0",
"framer-motion": "^12.10.0",
"i18next": "^25.1.1",
"framer-motion": "^12.12.1",
"i18next": "^25.1.3",
"i18next-browser-languagedetector": "^8.1.0",
"i18next-http-backend": "^3.0.2",
"isbot": "^5.1.27",
"isbot": "^5.1.28",
"jose": "^6.0.11",
"lucide-react": "^0.507.0",
"lucide-react": "^0.511.0",
"monaco-editor": "^0.52.2",
"posthog-js": "^1.239.1",
"posthog-js": "^1.245.1",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"react-highlight": "^0.15.0",
@@ -40,15 +40,15 @@
"react-icons": "^5.5.0",
"react-markdown": "^10.1.0",
"react-redux": "^9.2.0",
"react-router": "^7.5.3",
"react-router": "^7.6.0",
"react-syntax-highlighter": "^15.6.1",
"react-textarea-autosize": "^8.5.9",
"remark-gfm": "^4.0.1",
"sirv-cli": "^3.0.1",
"socket.io-client": "^4.8.1",
"tailwind-merge": "^3.2.0",
"tailwind-merge": "^3.3.0",
"vite": "^6.3.5",
"web-vitals": "^3.5.2",
"web-vitals": "^5.0.1",
"ws": "^8.18.2"
},
"scripts": {
@@ -68,7 +68,8 @@
"lint:fix": "eslint src --ext .ts,.tsx,.js --fix && prettier --write src/**/*.{ts,tsx}",
"prepare": "cd .. && husky frontend/.husky",
"typecheck": "react-router typegen && tsc",
"check-unlocalized-strings": "node scripts/check-unlocalized-strings.cjs"
"check-unlocalized-strings": "node scripts/check-unlocalized-strings.cjs",
"check-translation-completeness": "node scripts/check-translation-completeness.cjs"
},
"lint-staged": {
"src/**/*.{ts,tsx,js}": [
@@ -82,28 +83,28 @@
"@babel/types": "^7.27.0",
"@mswjs/socket.io-binding": "^0.1.1",
"@playwright/test": "^1.52.0",
"@react-router/dev": "^7.5.3",
"@react-router/dev": "^7.6.0",
"@tailwindcss/typography": "^0.5.16",
"@tanstack/eslint-plugin-query": "^5.74.7",
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.6.1",
"@testing-library/react": "^16.3.0",
"@testing-library/user-event": "^14.6.1",
"@types/node": "^22.15.12",
"@types/react": "^19.1.3",
"@types/react-dom": "^19.1.3",
"@types/node": "^22.15.21",
"@types/react": "^19.1.5",
"@types/react-dom": "^19.1.5",
"@types/react-highlight": "^0.12.8",
"@types/react-syntax-highlighter": "^15.5.13",
"@types/ws": "^8.18.1",
"@typescript-eslint/eslint-plugin": "^7.18.0",
"@typescript-eslint/parser": "^7.18.0",
"@vitest/coverage-v8": "^3.1.3",
"@vitest/coverage-v8": "^3.1.4",
"autoprefixer": "^10.4.21",
"cross-env": "^7.0.3",
"eslint": "^8.57.0",
"eslint-config-airbnb": "^19.0.4",
"eslint-config-airbnb-typescript": "^18.0.0",
"eslint-config-prettier": "^10.1.3",
"eslint-config-prettier": "^10.1.5",
"eslint-plugin-import": "^2.29.1",
"eslint-plugin-jsx-a11y": "^6.10.2",
"eslint-plugin-prettier": "^5.4.0",
@@ -112,11 +113,11 @@
"eslint-plugin-unused-imports": "^4.1.4",
"husky": "^9.1.7",
"jsdom": "^26.1.0",
"lint-staged": "^15.5.2",
"lint-staged": "^16.0.0",
"msw": "^2.6.6",
"postcss": "^8.5.2",
"prettier": "^3.5.3",
"stripe": "^18.1.0",
"stripe": "^18.1.1",
"tailwindcss": "^3.4.17",
"typescript": "^5.8.3",
"vite-plugin-svgr": "^4.2.0",

View File

@@ -0,0 +1,88 @@
#!/usr/bin/env node
/**
* Pre-commit hook script to check for translation completeness
* This script ensures that all translation keys have entries for all supported languages
*/
const fs = require('fs');
const path = require('path');
// Load the translation file
const translationJsonPath = path.join(__dirname, '../src/i18n/translation.json');
const translationJson = require(translationJsonPath);
// Load the available languages from the i18n index file
const i18nIndexPath = path.join(__dirname, '../src/i18n/index.ts');
const i18nIndexContent = fs.readFileSync(i18nIndexPath, 'utf8');
// Extract the language codes from the AvailableLanguages array
const languageCodesRegex = /\{ label: "[^"]+", value: "([^"]+)" \}/g;
const supportedLanguageCodes = [];
let match;
while ((match = languageCodesRegex.exec(i18nIndexContent)) !== null) {
supportedLanguageCodes.push(match[1]);
}
// Track missing and extra translations
const missingTranslations = {};
const extraLanguages = {};
let hasErrors = false;
// Check each translation key
Object.entries(translationJson).forEach(([key, translations]) => {
// Get the languages available for this key
const availableLanguages = Object.keys(translations);
// Find missing languages for this key
const missing = supportedLanguageCodes.filter(
(langCode) => !availableLanguages.includes(langCode)
);
if (missing.length > 0) {
missingTranslations[key] = missing;
hasErrors = true;
}
// Find extra languages for this key
const extra = availableLanguages.filter(
(langCode) => !supportedLanguageCodes.includes(langCode)
);
if (extra.length > 0) {
extraLanguages[key] = extra;
hasErrors = true;
}
});
// Generate detailed error message if there are missing translations
if (Object.keys(missingTranslations).length > 0) {
console.error('\x1b[31m%s\x1b[0m', 'ERROR: Missing translations detected');
console.error(`Found ${Object.keys(missingTranslations).length} translation keys with missing languages:`);
Object.entries(missingTranslations).forEach(([key, langs]) => {
console.error(`- Key "${key}" is missing translations for: ${langs.join(', ')}`);
});
console.error('\nPlease add the missing translations before committing.');
}
// Generate detailed error message if there are extra languages
if (Object.keys(extraLanguages).length > 0) {
console.error('\x1b[31m%s\x1b[0m', 'ERROR: Extra languages detected');
console.error(`Found ${Object.keys(extraLanguages).length} translation keys with extra languages not in AvailableLanguages:`);
Object.entries(extraLanguages).forEach(([key, langs]) => {
console.error(`- Key "${key}" has translations for unsupported languages: ${langs.join(', ')}`);
});
console.error('\nPlease remove the extra languages before committing.');
}
// Exit with error code if there are issues
if (hasErrors) {
process.exit(1);
} else {
console.log('\x1b[32m%s\x1b[0m', 'All translation keys have complete language coverage!');
}

View File

@@ -111,12 +111,26 @@ const EXCLUDED_TECHNICAL_STRINGS = [
"GitLab API", // Git provider specific terminology
"Pull Request", // Git provider specific terminology
"GitHub API", // Git provider specific terminology
"add-secret-form", // Test ID for secret form
"edit-secret-form", // Test ID for secret form
];
function isExcludedTechnicalString(str) {
return EXCLUDED_TECHNICAL_STRINGS.includes(str);
}
function isLikelyCode(str) {
// A string with no spaces and at least one underscore or colon is likely a code.
// (e.g.: "browser_interactive" or "error:")
if (str.includes(" ")) {
return false
}
if (str.includes(":") || str.includes("_")){
return true
}
return false
}
function isCommonDevelopmentString(str) {
// Technical patterns that are definitely not UI strings
const technicalPatterns = [
@@ -383,6 +397,11 @@ function isLikelyUserFacingText(str) {
return false;
}
// Check if it looks like a code rather than a key
if (isLikelyCode(str)) {
return false
}
// Check if it's a raw translation key that should be wrapped in t()
if (isRawTranslationKey(str)) {
return true;

View File

@@ -1,7 +1,9 @@
import OpenHands from "#/api/open-hands";
/**
* Returns a URL compatible for the file service
* @param conversationId ID of the conversation
* @returns URL of the conversation
*/
export const getConversationUrl = (conversationId: string) =>
`/api/conversations/${conversationId}`;
OpenHands.getConversationUrl(conversationId);

Some files were not shown because too many files have changed in this diff Show More