Compare commits

...

148 Commits

Author SHA1 Message Date
rohitvinodmalhotra@gmail.com
bc1305fc0e add section for gitlab in docs 2025-04-29 14:47:07 -04:00
rohitvinodmalhotra@gmail.com
f985a46cfd link to customize 2025-04-29 14:41:12 -04:00
rohitvinodmalhotra@gmail.com
9ca96afe29 clarify thread context 2025-04-29 14:38:57 -04:00
rohitvinodmalhotra@gmail.com
7f3f44432e fix typo 2025-04-29 03:42:56 -04:00
rohitvinodmalhotra@gmail.com
35b6b8ae2f add back link 2025-04-29 03:38:06 -04:00
rohitvinodmalhotra@gmail.com
52110305b3 fix header sizes 2025-04-29 03:33:52 -04:00
rohitvinodmalhotra@gmail.com
877644be8c fix structure 2025-04-29 03:31:58 -04:00
openhands
3bc85eb7ac Update resolver documentation for GitHub and GitLab support 2025-04-29 06:35:46 +00:00
Xingyao Wang
5fa01ed278 Fix mobile layout for repo picker and suggested tasks (#8137)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-28 21:39:09 -04:00
Xingyao Wang
1f747232cf feat: add agent memory microagents for experiment (#8122) 2025-04-28 19:08:44 -04:00
Engel Nyst
4b1ed30e97 Fix truncation, ensure first user message and log (#8103)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-28 22:43:41 +02:00
Panduka Muditha
998de564cd feat: Add CLI support for agent pause and resume (#8129)
Co-authored-by: Bashwara Undupitiya <bashwarau@verdentra.com>
2025-04-28 16:26:18 -04:00
Engel Nyst
06ce12eff4 Update resolver model (#8124) 2025-04-28 21:07:40 +02:00
sp.wack
88fc26d9b0 frontend: Better diff contrast (#8128) 2025-04-28 22:53:37 +04:00
dependabot[bot]
99233ec153 chore(deps): bump the version-all group with 6 updates (#8125)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-28 20:50:39 +02:00
sp.wack
ae9573a503 chore(frontend): Breakdown settings screen (#7929)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-28 22:32:28 +04:00
dependabot[bot]
f2725eeb3d chore(deps): bump the version-all group across 1 directory with 8 updates (#8123)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>
2025-04-28 17:18:21 +00:00
Engel Nyst
1b63633030 Simplify microagents (#8114)
Co-authored-by: Robert Brennan <accounts@rbren.io>
2025-04-28 15:00:06 +00:00
Ryan H. Tran
107789b5a8 Add missing last ps1 match output into combined output (#8097) 2025-04-28 21:06:26 +08:00
Panduka Muditha
04bdea5faf feat: Add CLI support for /new, /status and /settings (#8017)
Co-authored-by: Bashwara Undupitiya <bashwarau@verdentra.com>
2025-04-28 08:52:33 -04:00
Engel Nyst
2bad4ea3d2 Litellm emergency fix (#8116) 2025-04-28 08:36:45 +02:00
Rohit Malhotra
1c4c477b3f [Refactor]: Abstract resolver into classes (#7999)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-27 18:52:52 -04:00
Rohit Malhotra
391ba1d988 [Fix]: Move suggest task prompts to BE (#8109)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-27 16:32:15 -04:00
Engel Nyst
70f469b0c1 [chore] run full pre-commit (#8104) 2025-04-27 08:43:26 +02:00
Graham Neubig
8a5c6d3bef Fix long setup scripts failing (#8101)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-26 12:27:54 -04:00
tofarr
998e04e51b Fix for issue where running unit tests resets local settings (#8100)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-04-26 07:24:49 -06:00
Boxuan Li
da7041b5e9 Allow cmd execution after running a background command (#8093) 2025-04-26 03:04:05 +00:00
Robert Brennan
e4b7b31f48 Add VS Code tab alongside workspace, terminal, and jupyter tabs (#8033)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-25 15:30:50 -07:00
chuckbutkus
587c53f115 Maybe fix for pre-commit errors (#8082)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Rohit Malhotra <rohitvinodmalhotra@gmail.com>
Co-authored-by: Graham Neubig <neubig@gmail.com>
2025-04-25 21:10:00 +00:00
mamoodi
4d76f31610 Update Microagents documentation (#7978) 2025-04-25 16:52:55 -04:00
Rohit Malhotra
c25a96825c [Fix]: Sort github repos in memory for cloud openhands (#8083)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-04-25 15:16:43 -04:00
OpenHands
16137942eb Fix issue #7888: Update resolver doc to refer to repo.md instead of the old file (#8089)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-04-25 16:13:05 +00:00
dependabot[bot]
cf07b151dc chore(deps): bump the version-all group with 5 updates (#8091)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-25 17:30:17 +02:00
Lenshood
ea3787c2ba fix: dev.sh failed due to outdated configuration of Dockerfile (#8058)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-04-25 11:36:53 +08:00
chuckbutkus
956d278138 fix call to convo store get_instance (#8081) 2025-04-25 01:41:00 +00:00
மனோஜ்குமார் பழனிச்சாமி
fa186fc433 chore: Use recommended functions (#8056) 2025-04-25 09:39:19 +08:00
Ray Myers
4ef9c72da1 fix - Show error message for too many conversations (#8078) 2025-04-24 19:53:17 -05:00
tofarr
c5245a622d Refactor Authentication (#8040)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: rohitvinodmalhotra@gmail.com <rohitvinodmalhotra@gmail.com>
2025-04-24 18:49:41 -06:00
Engel Nyst
9b1aaa53fe Reset the reset (#8079) 2025-04-25 00:59:37 +02:00
Rohit Malhotra
4deffa3907 [Fix]: Don't wipe out secret store on reset route (#8075) 2025-04-24 17:41:46 -04:00
Rohit Malhotra
a47c6f3ed1 [Fix]: Log HTTP err type in Git service classes (#8076) 2025-04-24 17:41:36 -04:00
dependabot[bot]
90ece3f8e1 chore(deps): bump the version-all group across 1 directory with 14 updates (#8062)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>
2025-04-24 20:04:33 +00:00
Robert Brennan
a948b0fef3 Change log levels from info to debug for various agent controller messages (#8073)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-24 20:00:21 +00:00
Robert Brennan
52848cd3db Improve git handler with better branch management (#8065)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-24 12:34:25 -07:00
mamoodi
62f015370a Update Documentation with new release features (#8045) 2025-04-24 14:58:42 -04:00
sp.wack
7109b057b6 Make tmux optional for development (#8063)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <engel.nyst@gmail.com>
2025-04-24 18:13:33 +00:00
Rohit Malhotra
e694fc2d58 [Feat]: Add suggested tasks for Gitlab (#8049) 2025-04-24 13:26:52 -04:00
Robert Brennan
50baf3fd18 Add OpenAPI spec to Docusaurus documentation (#7998)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-24 10:19:33 -07:00
dependabot[bot]
dc37f039bf chore(deps): bump the version-all group across 1 directory with 14 updates (#8064)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-24 18:01:39 +02:00
Engel Nyst
91fff77ffe Fix lint (#8060) 2025-04-24 14:34:22 +00:00
sp.wack
db01cd39d5 hotfix: Run both BE and FE pre-commits with husky (#8059) 2025-04-24 18:21:41 +04:00
மனோஜ்குமார் பழனிச்சாமி
00865fbb8a fix: reconnect if stream closed (#8055) 2025-04-24 16:05:28 +04:00
Rohit Malhotra
356cd9ff9f (Hotfix): Issue comments on Cloud Resolver (#8053) 2025-04-23 22:50:00 -04:00
Xingyao Wang
78d82408b7 fix: use lower range of tcp port for file viewer (#8051) 2025-04-24 01:51:44 +00:00
Panduka Muditha
3c4ebc3d8a fix: Add error handling for scenarios where tmux is unavailable (#8043)
Co-authored-by: Anuradha Weeraman <anuradha@verdentra.com>
2025-04-23 21:01:06 +02:00
Rohit Malhotra
964478c22f [Feat]: Custom secrets plumbing for BE (#7891)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-23 12:44:25 -04:00
Robert Brennan
00c449d447 Add loading indicator to repository dropdown on home page (#8015)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-23 12:27:49 -04:00
Rohit Malhotra
bfd75a1355 (Chore): Rm legacy resolver code (#8001) 2025-04-23 11:34:03 -04:00
Graham Neubig
dc91cb263b Add extensive typing to controller directory (#7731)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Ray Myers <ray.myers@gmail.com>
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2025-04-23 15:33:17 +00:00
Robert Brennan
fa559ace86 Add API keys management UI to settings page (#7710)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-23 19:08:32 +04:00
mamoodi
1fd26d196a Release 0.34.0 (#8011) 2025-04-23 11:06:28 -04:00
Robert Brennan
5de62d85fd add an option for a headless backend (#8032) 2025-04-23 04:09:22 +02:00
Chase
5d749aeba7 replace erroneous rstrip() with removesuffix() (#8024) 2025-04-22 22:50:14 +00:00
Chase
693c72d670 remove sse subsection accessor of McpConfig in action_execution_client (#8021) 2025-04-23 00:40:10 +02:00
Engel Nyst
62557d44f2 Use short tool descriptions for o4-mini (#8022) 2025-04-23 00:30:21 +02:00
Robert Brennan
89f8e162da Fix: Don't show status indicator for command timeouts (#8012)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-22 23:30:07 +04:00
Robert Brennan
b0a9938e6c Always run git init in SaaS mode regardless of workspace_base setting (#8014)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-22 17:43:20 +00:00
Rohit Malhotra
039fe295a4 Add RateLimitError and handle rate limiting in GitLab and GitHub services (#8003)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-04-22 16:30:41 +00:00
dependabot[bot]
8f3ff1210e chore(deps): bump the version-all group with 6 updates (#8009)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-22 15:16:41 +00:00
Peter Hamilton
5e1e685493 fix: Use SANDBOX_BASE_CONTAINER_IMAGE in resolver workflow (#7956)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-22 16:15:49 +02:00
sp.wack
e9f2b72ea5 chore: Better home screen (#7784)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-22 13:15:41 +00:00
Rohit Malhotra
986b90be0a [Fix]: fetch latest token when existing token doesn't exist (#8000) 2025-04-22 03:22:48 +00:00
Robert Brennan
bf9f2aa7a5 Initialize git repo in workspace when no GitHub repo is selected (#7904)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-21 18:34:42 -04:00
Engel Nyst
b3bd3924a0 Fix and simplify local runtime init (#7997) 2025-04-22 00:24:22 +02:00
Rohit Malhotra
0de50153a0 Add HTTP method option to Git service fetch_data functions (#7996)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-21 18:15:05 -04:00
Xingyao Wang
a04024a239 refactor: file viewer server so it is accessible via localhost without authentication (#7987)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-21 22:12:06 +00:00
Rohit Malhotra
1e509a70d4 [Fix]: Dedup token verification logic in resolver (#7967)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-21 20:34:29 +00:00
Engel Nyst
300a59853b Quick fix local runtime (#7991) 2025-04-21 20:28:30 +00:00
Rohit Malhotra
2514b200c5 Fix dictionary changed size during iteration error in EventStream (#7984)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-21 16:21:30 -04:00
Michael Panchenko
52d881c98d Fix issue with "default" in tool schema for gemini-preview (#7964) 2025-04-21 21:21:33 +02:00
Rohit Malhotra
6ac23aea80 [Feat]: Add branch naming convention (#7989) 2025-04-21 18:35:16 +00:00
Xingyao Wang
0412949018 Add agent_class to SystemMessageAction and display in frontend (#7935)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-22 02:08:15 +08:00
Michael Panchenko
5b5adc5c7b Minor refactoring: remove step and task-creation in AgentController (#7981) 2025-04-21 19:28:22 +02:00
dependabot[bot]
0b40f6fac8 chore(deps): bump the version-all group with 7 updates (#7980)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-21 19:14:58 +02:00
Rohit Malhotra
44d488b718 [Fix]: Building runtime image with unspecified base container img (#7977) 2025-04-21 11:30:22 -04:00
Rohit Malhotra
4f9120ffc6 (Chore): Unsupport resolve all issues in resolver (#7975) 2025-04-21 14:37:53 +00:00
Tetsuuuuuuu
50426edaa1 feat: add argument --base-container-image to resolver (#7612)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2025-04-21 10:01:03 -04:00
Xingyao Wang
a792f84a83 chore: improve logging for "not a git repository" (#7944) 2025-04-21 16:13:48 +04:00
Alexander Litzenberger
cd9d96766c Update documentation on local-llms (#7805)
Co-authored-by: Alex Litzenberger <alex@agot.ai>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-04-21 00:20:13 +00:00
Michael Panchenko
14564b25d6 Fix linting (#7965) 2025-04-21 06:34:40 +08:00
Rohit Malhotra
0637b5b912 [Fix]: Replace duplicate enums for providers in resolver (#7954) 2025-04-20 14:06:18 -04:00
Rohit Malhotra
20bf48b693 [Fix]: mismatch between repo object definition between FE and BE (#7953) 2025-04-20 00:28:10 +00:00
Michael Panchenko
5b3270be2d Possibility to disable default tools (#7951) 2025-04-20 02:14:46 +02:00
Tom Deckers
ae43744052 Fix Github Enterprise GraphQL URL (#7939)
Co-authored-by: Tom Deckers <tdeckers@cisco.com>
Co-authored-by: Robert Brennan <accounts@rbren.io>
Co-authored-by: Rohit Malhotra <rohitvinodmalhotra@gmail.com>
2025-04-19 21:09:00 +00:00
Xingyao Wang
b5d7e428d1 Fix #7916: Update Benchmark Score link in README (#7943)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-19 00:27:54 +02:00
Rohit Malhotra
d65ea313e8 Add external auth id to /repos route (#7946) 2025-04-18 18:19:36 -04:00
Rohit Malhotra
a09ecadba6 (Chore): Rm deprecate /installations route (#7945) 2025-04-18 21:51:08 +00:00
Rohit Malhotra
358166feb2 [Logging]: Add warning logs for gitlab api (#7941) 2025-04-18 21:31:36 +00:00
Kenny Dizi
85e2b73eb4 [Feat] Support o3 and o4 mini (#7898)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2025-04-18 21:23:19 +00:00
Rohit Malhotra
c18475ddc2 Add public vs private info for repo list (#7937) 2025-04-18 17:07:08 -04:00
Xingyao Wang
06fcf54475 chore: track condeser metadata for llm completion (#7938) 2025-04-19 05:05:31 +08:00
Rohit Malhotra
f751f8ab37 [Feat]: Add graphql fetching for GitLab service cls (#7839) 2025-04-18 16:53:49 -04:00
Tei1988
523c6d03c1 fix(resolver): Allow using github.token in Actions and fix base_domain handling (#7934) 2025-04-18 20:50:59 +00:00
Rohit Malhotra
0e0f043e59 [Feat]: Improve resolver inline pr comment localization (#7932) 2025-04-18 15:56:34 -04:00
Xingyao Wang
91c691d526 [agent] Read-only Agent (#6947)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-19 02:46:24 +08:00
Xingyao Wang
c6092291ce chore: make sure Makefile make lint also lint tests (#7933) 2025-04-18 14:01:36 -04:00
Engel Nyst
a2c55cfdef Refactor to clean up and move utility/legacy out of the agent (#7917) 2025-04-19 01:53:33 +08:00
Michael Panchenko
76cad626ed Bugfix: make extraction of poetry_venvs_path more robust (#7920) 2025-04-18 17:33:52 +00:00
Xingyao Wang
7c23993344 fix(eval): typo in SWE_Bench evaluation (#7930) 2025-04-19 00:31:08 +08:00
dependabot[bot]
b669715416 chore(deps): bump the version-all group across 1 directory with 20 updates (#7925)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>
2025-04-18 16:08:35 +00:00
tofarr
7292122b72 Refactor agent loop initialization for better extensibility (#7926) 2025-04-18 09:44:34 -06:00
dependabot[bot]
992ae15c78 chore(deps): bump the version-all group with 7 updates (#7927)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-18 15:02:41 +00:00
Carlos Freund
f2b4772ac2 fix(runtime) Avoid windows reserved port (#7722)
Co-authored-by: Carlos Freund <carlosfreund@gmail.com>
2025-04-18 22:18:46 +08:00
Engel Nyst
9b9b1291fc [chore] Just linting on swe-bench files (#7918) 2025-04-18 22:12:01 +08:00
Xingyao Wang
6171395ef9 Refactor metrics handling to include condenser metrics (#7907)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-17 23:08:35 +00:00
sp.wack
d270476d6c hotfix(backend): Exclude open PRs when fetching suggested tasks (#7912) 2025-04-18 03:00:04 +04:00
Xingyao Wang
f1f7dca009 refactor(action_execution_client): rename function and add property (#7913) 2025-04-18 06:59:13 +08:00
mamoodi
45f572f268 Update documentation (#7905) 2025-04-17 17:43:18 -04:00
Niels Mündler
4b124d5906 Add inference for SWT-Bench (#7201)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
Co-authored-by: Graham Neubig <neubig@gmail.com>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Calvin Smith <email@cjsmith.io>
2025-04-17 14:49:42 -06:00
Robert Brennan
988d4aa679 Improved logging for agent controller, including pending action time (#7897)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-17 20:22:38 +00:00
Robert Brennan
b452fe273c Restore terminal interactivity while keeping UI changes (#7903)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-17 19:42:03 +00:00
sp.wack
9544b37c8a hotfix(forntend): Wait for runtime to start before fetching git changes for diff view (#7910) 2025-04-17 15:15:03 -04:00
Rohit Malhotra
0491357fef [Refactor]: Collapse initial user message for cloud resolver (#7871)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-17 15:09:28 -04:00
Robert Brennan
fedd517a71 Change add funds input to number type that only accepts integers (#7628)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-17 18:56:23 +00:00
Rohit Malhotra
9cbed8802f Update translation from GitHub Settings to Git Settings (#7908)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-17 18:55:28 +00:00
Rohit Malhotra
c2e1babd76 Fix failing unit test on main (#7909)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-17 18:43:09 +00:00
Carlos Freund
cc8b677f3e fix(metrics) Merge metrics of agent LLM and condenser LLM (#7890)
Co-authored-by: Carlos Freund <carlosfreund@gmail.com>
2025-04-18 01:15:14 +08:00
chuckbutkus
78e3f82de1 Change client name and add IDP hint (#7787)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Ray Myers <ray.myers@gmail.com>
Co-authored-by: Calvin Smith <email@cjsmith.io>
Co-authored-by: Calvin Smith <calvin@all-hands.dev>
Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Graham Neubig <neubig@gmail.com>
Co-authored-by: Panduka Muditha <pandukal@verdentra.com>
Co-authored-by: Bashwara Undupitiya <bashwarau@verdentra.com>
Co-authored-by: Rohit Malhotra <rohitvinodmalhotra@gmail.com>
2025-04-17 12:23:12 -04:00
dependabot[bot]
20ca2cd8b9 chore(deps): bump the version-all group across 1 directory with 9 updates (#7902)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-17 17:45:44 +02:00
mamoodi
ea0fcd6002 Change title of button to start new conversation (#7464) 2025-04-17 11:16:33 -04:00
juanmichelini
6bcebd4b9d Jetbrains CI Benchmark (#7811)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2025-04-17 15:10:20 +00:00
Xingyao Wang
93e9db3206 Refactor system message handling to use event stream (#7824)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Calvin Smith <email@cjsmith.io>
2025-04-17 22:30:19 +08:00
Ray Myers
caf34d83bd chore - remove dummy agent test (#7848) 2025-04-17 08:52:02 -04:00
LoneRifle
49d3cd0863 fix(check-unlocalized-strings): make HTML tag test case-insens (#7892) 2025-04-17 14:26:49 +02:00
sp.wack
34989f8e96 feat: Diff UI (#6934) 2025-04-17 16:12:25 +04:00
Rohit Malhotra
9274664302 [Fix]: Rm unnecessary provider token serializer (#7889) 2025-04-16 21:41:07 +00:00
mamoodi
437f0a0154 Release 0.33.0 (#7882)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: tofarr <tofarr@gmail.com>
Co-authored-by: Robert Brennan <accounts@rbren.io>
Co-authored-by: Bashwara Undupitiya <65051545+bashwara@users.noreply.github.com>
2025-04-16 17:03:00 -04:00
Bashwara Undupitiya
9d79bf5fff fix: Update folder security dialog styling (#7886) 2025-04-16 14:33:00 -04:00
Robert Brennan
4c62b1d428 Fix: Ensure consistent tab height when workspace tab is selected (#7885)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-16 17:54:50 +00:00
Rohit Malhotra
b2a4b4ed90 [Refactor]: Modularize settings storage logic (#7868)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-16 13:30:13 -04:00
tofarr
9262babc3b Fix for error on close (#7884) 2025-04-16 11:13:58 -06:00
dependabot[bot]
1c80ded753 chore(deps-dev): bump the eslint group across 1 directory with 2 updates (#7790)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-04-16 19:41:25 +04:00
Xingyao Wang
4de8c4d6b1 Update repo microagent docs with frontend action handling information (#7856)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-16 14:27:31 +00:00
Xingyao Wang
91f2254039 frontend: fix terminal prompt and command styling (#7872)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-16 22:13:15 +08:00
Calvin Smith
66fd156c65 feat: Combining condensers (#7867)
Co-authored-by: Calvin Smith <calvin@all-hands.dev>
2025-04-16 07:09:13 -06:00
Xingyao Wang
4ec16f3c2e microagent: Update github.md to avoid agent marking PR as ready for review (#7873) 2025-04-15 23:56:41 -04:00
Robert Brennan
628003abef Convert terminal to tab, make terminal read only (#7795)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2025-04-16 11:22:52 +08:00
Xingyao Wang
07e400b73d refactor(mcp): simplify MCP config & fix timeout (#7820)
Co-authored-by: ducphamle2 <ducphamle212@gmail.com>
Co-authored-by: trungbach <trunga2k29@gmail.com>
Co-authored-by: quangdz1704 <Ntq.1704@gmail.com>
Co-authored-by: Duc Pham <44611780+ducphamle2@users.noreply.github.com>
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-16 11:04:21 +08:00
417 changed files with 28628 additions and 11013 deletions

View File

@@ -1,53 +0,0 @@
# Workflow that uses the DummyAgent to run a simple task
name: Run E2E test with dummy agent
# Always run on "main"
# Always run on PRs
on:
push:
branches:
- main
pull_request:
# If triggered by a PR, it will be in the same group. However, each commit on main will be in its own unique group
concurrency:
group: ${{ github.workflow }}-${{ (github.head_ref && github.ref) || github.run_id }}
cancel-in-progress: true
jobs:
test:
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
- name: Install tmux
run: sudo apt-get update && sudo apt-get install -y tmux
- name: Setup Node.js
uses: useblacksmith/setup-node@v5
with:
node-version: '22.x'
- name: Install poetry via pipx
run: pipx install poetry
- name: Set up Python
uses: useblacksmith/setup-python@v6
with:
python-version: '3.12'
cache: 'poetry'
- name: Install Python dependencies using Poetry
run: poetry install --without evaluation
- name: Build Environment
run: make build
- name: Run tests
run: |
set -e
SANDBOX_FORCE_REBUILD_RUNTIME=True poetry run python3 openhands/core/main.py -t "do a flip" -d ./workspace/ -c DummyAgent
- name: Check exit code
run: |
if [ $? -ne 0 ]; then
echo "Test failed"
exit 1
else
echo "Test passed"
fi

View File

@@ -24,7 +24,7 @@ on:
LLM_MODEL:
required: false
type: string
default: "anthropic/claude-3-5-sonnet-20241022"
default: "anthropic/claude-3-7-sonnet-20250219"
LLM_API_VERSION:
required: false
type: string
@@ -179,7 +179,7 @@ jobs:
echo "MAX_ITERATIONS=${{ inputs.max_iterations || 50 }}" >> $GITHUB_ENV
echo "SANDBOX_ENV_GITHUB_TOKEN=${{ secrets.PAT_TOKEN || github.token }}" >> $GITHUB_ENV
echo "SANDBOX_ENV_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
echo "SANDBOX_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
# Set branch variables
echo "TARGET_BRANCH=${{ inputs.target_branch || 'main' }}" >> $GITHUB_ENV

View File

@@ -54,3 +54,20 @@ Frontend:
## Template for Github Pull Request
If you are starting a pull request (PR), please follow the template in `.github/pull_request_template.md`.
## Implementation Details
These details may or may not be useful for your current task.
### Frontend
#### Action Handling:
- Actions are defined in `frontend/src/types/action-type.ts`
- The `HANDLED_ACTIONS` array in `frontend/src/state/chat-slice.ts` determines which actions are displayed as collapsible UI elements
- To add a new action type to the UI:
1. Add the action type to the `HANDLED_ACTIONS` array
2. Implement the action handling in `addAssistantAction` function in chat-slice.ts
3. Add a translation key in the format `ACTION_MESSAGE$ACTION_NAME` to the i18n files
- Actions with `thought` property are displayed in the UI based on their action type:
- Regular actions (like "run", "edit") display the thought as a separate message
- Special actions (like "think") are displayed as collapsible elements only

View File

@@ -118,7 +118,7 @@ poetry run pytest ./tests/unit/test_*.py
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.32-nikolaik`
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.34-nikolaik`
## Develop inside Docker container

View File

@@ -39,6 +39,7 @@ ifeq ($(INSTALL_DOCKER),)
@$(MAKE) -s check-docker
endif
@$(MAKE) -s check-poetry
@$(MAKE) -s check-tmux
@echo "$(GREEN)Dependencies checked successfully.$(RESET)"
check-system:
@@ -101,6 +102,18 @@ check-docker:
exit 1; \
fi
check-tmux:
@echo "$(YELLOW)Checking tmux installation...$(RESET)"
@if command -v tmux > /dev/null; then \
echo "$(BLUE)$(shell tmux -V) is already installed.$(RESET)"; \
else \
echo "$(YELLOW)╔════════════════════════════════════════════════════════════════════════════╗$(RESET)"; \
echo "$(YELLOW)║ OPTIONAL: tmux is not installed. ║$(RESET)"; \
echo "$(YELLOW)║ Some advanced terminal features may not work without tmux. ║$(RESET)"; \
echo "$(YELLOW)║ You can install it if needed, but it's not required for development. ║$(RESET)"; \
echo "$(YELLOW)╚════════════════════════════════════════════════════════════════════════════╝$(RESET)"; \
fi
check-poetry:
@echo "$(YELLOW)Checking Poetry installation...$(RESET)"
@if command -v poetry > /dev/null; then \
@@ -175,7 +188,7 @@ install-pre-commit-hooks:
lint-backend:
@echo "$(YELLOW)Running linters...$(RESET)"
@poetry run pre-commit run --files openhands/**/* agenthub/**/* evaluation/**/* --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)
@poetry run pre-commit run --files openhands/**/* evaluation/**/* tests/**/* --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)
lint-frontend:
@echo "$(YELLOW)Running linters for frontend...$(RESET)"

View File

@@ -18,7 +18,7 @@
<br/>
<a href="https://docs.all-hands.dev/modules/usage/getting-started"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
<a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv"></a>
<a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
<a href="https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0#gid=0"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
<hr>
</div>
@@ -52,17 +52,17 @@ system requirements and more information.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.32
docker.all-hands.dev/all-hands-ai/openhands:0.34
```
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!

View File

@@ -221,9 +221,22 @@ enable_browsing = true
# Whether the LLM draft editor is enabled
enable_llm_editor = false
# Whether the standard editor tool (str_replace_editor) is enabled
# Only has an effect if enable_llm_editor is False
enable_editor = true
# Whether the IPython tool is enabled
enable_jupyter = true
# Whether the command tool is enabled
enable_cmd = true
# Whether the think tool is enabled
enable_think = true
# Whether the finish tool is enabled
enable_finish = true
# LLM config group to use
#llm_config = 'your-llm-config-group'

View File

@@ -61,8 +61,8 @@ RUN add-apt-repository ppa:deadsnakes/ppa \
&& apt-get install -y python3.12 python3.12-venv python3.12-dev python3-pip \
&& ln -s /usr/bin/python3.12 /usr/bin/python
# NodeJS >= 18.17.1
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
# NodeJS >= 22.x
RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
&& apt-get install -y nodejs
# Poetry >= 1.8
@@ -108,7 +108,7 @@ WORKDIR /app
# cache build dependencies
RUN \
--mount=type=bind,source=./,target=/app/ \
--mount=type=bind,source=./,target=/app/,rw \
<<EOF
#!/bin/bash
make -s clean

View File

@@ -11,7 +11,7 @@ services:
- BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
- SANDBOX_API_HOSTNAME=host.docker.internal
#
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.32-nikolaik}
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.34-nikolaik}
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
ports:

View File

@@ -7,7 +7,7 @@ services:
image: openhands:latest
container_name: openhands-app-${DATE:-}
environment:
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik}
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik}
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
ports:

1
docs/.gitignore vendored
View File

@@ -3,6 +3,7 @@
# Production
/build
/static/swagger-ui
# Generated files
.docusaurus

View File

@@ -36,6 +36,7 @@ const config: Config = {
mermaid: true,
},
themes: ['@docusaurus/theme-mermaid'],
plugins: [],
presets: [
[
'classic',
@@ -75,6 +76,11 @@ const config: Config = {
position: 'left',
label: 'User Guides',
},
{
href: 'https://docs.all-hands.dev/swagger-ui/', // FIXME: this should be a relative path, but docusarus steals the click
label: 'API',
position: 'left',
},
{
type: 'localeDropdown',
position: 'left',

102
docs/generate-swagger-ui.js Normal file
View File

@@ -0,0 +1,102 @@
const fs = require('fs');
const path = require('path');
const swaggerUiDist = require('swagger-ui-dist');
/**
* This script manually sets up Swagger UI for the Docusaurus documentation.
*
* Why we need this approach:
* 1. Docusaurus doesn't have a built-in way to integrate Swagger UI
* 2. We need to copy the necessary files from swagger-ui-dist to our static directory
* 3. We need to create a custom index.html file that points to our OpenAPI spec
* 4. This approach allows us to customize the Swagger UI to match our documentation style
*/
// Get the absolute path to the swagger-ui-dist package
const swaggerUiDistPath = swaggerUiDist.getAbsoluteFSPath();
// Create the target directory if it doesn't exist
const targetDir = path.join(__dirname, 'static', 'swagger-ui');
if (!fs.existsSync(targetDir)) {
fs.mkdirSync(targetDir, { recursive: true });
}
// Copy all files from swagger-ui-dist to our target directory
const files = fs.readdirSync(swaggerUiDistPath);
files.forEach(file => {
const sourcePath = path.join(swaggerUiDistPath, file);
const targetPath = path.join(targetDir, file);
// Skip directories and non-essential files
if (fs.statSync(sourcePath).isDirectory() ||
file === 'package.json' ||
file === 'README.md' ||
file.endsWith('.map')) {
return;
}
fs.copyFileSync(sourcePath, targetPath);
});
// Create a custom index.html file that points to our OpenAPI spec
const indexHtml = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>OpenHands API Documentation</title>
<link rel="stylesheet" type="text/css" href="./swagger-ui.css" />
<link rel="icon" type="image/png" href="./favicon-32x32.png" sizes="32x32" />
<link rel="icon" type="image/png" href="./favicon-16x16.png" sizes="16x16" />
<style>
html {
box-sizing: border-box;
overflow: -moz-scrollbars-vertical;
overflow-y: scroll;
}
*,
*:before,
*:after {
box-sizing: inherit;
}
body {
margin: 0;
background: #fafafa;
}
</style>
</head>
<body>
<div id="swagger-ui"></div>
<script src="./swagger-ui-bundle.js" charset="UTF-8"> </script>
<script src="./swagger-ui-standalone-preset.js" charset="UTF-8"> </script>
<script>
window.onload = function() {
// Begin Swagger UI call region
const ui = SwaggerUIBundle({
url: "/openapi.json",
dom_id: '#swagger-ui',
deepLinking: true,
presets: [
SwaggerUIBundle.presets.apis,
SwaggerUIStandalonePreset
],
plugins: [
SwaggerUIBundle.plugins.DownloadUrl
],
layout: "StandaloneLayout"
});
// End Swagger UI call region
window.ui = ui;
};
</script>
</body>
</html>
`;
fs.writeFileSync(path.join(targetDir, 'index.html'), indexHtml);
console.log('Swagger UI files generated successfully in static/swagger-ui/');

View File

@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -61,7 +61,7 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.32 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.cli
```

View File

@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -56,6 +56,6 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.32 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
```

View File

@@ -13,16 +13,16 @@
La façon la plus simple d'exécuter OpenHands est avec Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.32
docker.all-hands.dev/all-hands-ai/openhands:0.34
```
Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).

View File

@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.
```
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-v /var/run/docker.sock:/var/run/docker.sock \
# ...
```

View File

@@ -34,7 +34,7 @@ Docker で OpenHands を CLI モードで実行するには:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -44,7 +44,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.32 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.cli
```

View File

@@ -31,7 +31,7 @@ DockerでOpenHandsをヘッドレスモードで実行するには:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -42,7 +42,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.32 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.main -t "write a bash script that prints hi"
```

View File

@@ -25,7 +25,7 @@ nikolaik の `SANDBOX_RUNTIME_CONTAINER_IMAGE` は、ランタイムサーバー
```bash
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-v $WORKSPACE_BASE:/opt/workspace_base \
@@ -82,5 +82,5 @@ docker network create openhands-network
# 分離されたネットワークで OpenHands を実行
docker run # ... \
--network openhands-network \
docker.all-hands.dev/all-hands-ai/openhands:0.32
docker.all-hands.dev/all-hands-ai/openhands:0.34
```

View File

@@ -35,7 +35,7 @@ Para executar o OpenHands no modo CLI com Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -45,7 +45,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.32 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.cli
```

View File

@@ -32,7 +32,7 @@ Para executar o OpenHands no modo Headless com Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -43,7 +43,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.32 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.main -t "escreva um script bash que imprima oi"
```

View File

@@ -58,17 +58,17 @@
A maneira mais fácil de executar o OpenHands é no Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.32
docker.all-hands.dev/all-hands-ai/openhands:0.34
```
Você encontrará o OpenHands em execução em http://localhost:3000!

View File

@@ -4,7 +4,7 @@
Microagentes públicos são diretrizes especializadas acionadas por palavras-chave para todos os usuários do OpenHands.
Eles são definidos em arquivos markdown no diretório
[`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge).
[`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge).
Microagentes públicos:
- Monitoram comandos recebidos em busca de suas palavras-chave de acionamento.
@@ -15,7 +15,7 @@ Microagentes públicos:
## Microagentes Públicos Atuais
Para mais informações sobre microagentes específicos, consulte seus arquivos de documentação individuais no
diretório [`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge/).
diretório [`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/).
### Agente GitHub
**Arquivo**: `github.md`
@@ -59,7 +59,7 @@ yes | npm install package-name
## Contribuindo com um Microagente Público
Você pode criar seus próprios microagentes públicos adicionando novos arquivos markdown ao
diretório [`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge/).
diretório [`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/).
### Melhores Práticas para Microagentes Públicos
@@ -81,7 +81,7 @@ Antes de criar um microagente público, considere:
#### 2. Crie o Arquivo
Crie um novo arquivo markdown em [`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge/)
Crie um novo arquivo markdown em [`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/)
com um nome descritivo (por exemplo, `docker.md` para um agente focado em Docker).
Atualize o arquivo com o frontmatter necessário [de acordo com o formato exigido](./microagents-overview#microagent-format)

View File

@@ -13,7 +13,7 @@ Este é o Runtime padrão que é usado quando você inicia o OpenHands. Você po
```
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-v /var/run/docker.sock:/var/run/docker.sock \
# ...
```

View File

@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -59,7 +59,7 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.32 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.cli
```

View File

@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -57,6 +57,6 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.32 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
```

View File

@@ -11,16 +11,16 @@
在 Docker 中运行 OpenHands 是最简单的方式。
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.32
docker.all-hands.dev/all-hands-ai/openhands:0.34
```
你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode),或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。

View File

@@ -11,7 +11,7 @@
```
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-v /var/run/docker.sock:/var/run/docker.sock \
# ...
```

View File

@@ -8,18 +8,22 @@ OpenHands Cloud can be accessed at https://app.all-hands.dev/.
## Getting Started
After visiting OpenHands Cloud, you will be asked to connect with your GitHub account:
1. After reading and accepting the terms of service, click `Connect to GitHub`.
After visiting OpenHands Cloud, you will be asked to connect with your GitHub or GitLab account:
1. After reading and accepting the terms of service, click `Log in with GitHub` or `Log in with GitLab`.
2. Review the permissions requested by OpenHands and then click `Authorize OpenHands AI`.
- OpenHands will require some permissions from your GitHub account. To read more about these permissions,
you can click the `Learn more` link on the GitHub authorize page.
- OpenHands will require some permissions from your GitHub or GitLab account. To read more about these permissions:
- GitHub: You can click the `Learn more` link on the GitHub authorize page.
- GitLab: You can expand each permission request on the GitLab authorize page.
## Repository Access
### Adding Repository Access
### GitHub
#### Adding Repository Access
You can grant OpenHands specific repository access:
1. Click the `Select a GitHub project` dropdown, select `Add more repositories...`.
1. Click `Add GitHub repos` on the Home page.
2. Select the organization, then choose the specific repositories to grant OpenHands access to.
<details>
<summary>Permission Details for Repository Access</summary>
@@ -42,11 +46,15 @@ You can grant OpenHands specific repository access:
3. Click on `Install & Authorize`.
### Modifying Repository Access
#### Modifying Repository Access
You can modify repository access at any time by:
* Using the same `Select a GitHub project > Add more repositories` workflow, or
* Visiting the Settings page and selecting `Configure GitHub Repositories` under the `GitHub Settings` section.
You can modify GitHub repository access at any time by:
* Using the same `Add GitHub repos` workflow, or
* Visiting the Settings page and selecting `Configure GitHub Repositories` under the `Git Settings` section.
### GitLab
When using your GitLab account, OpenHands will automatically have access to your repositories.
## Conversation Persistence

View File

@@ -1,18 +1,17 @@
# Repository Customization
You can customize how OpenHands works with your repository by creating a
You can customize how OpenHands interacts with your repository by creating a
`.openhands` directory at the root level.
## Microagents
You can use microagents to extend the OpenHands prompts with information
about your project and how you want OpenHands to work. See
[Repository Microagents](../prompting/microagents-repo) for more information.
Microagents allow you to extend OpenHands prompts with information specific to your project and define how OpenHands
should function. See [Microagents Overview](../prompting/microagents-overview) for more information.
## Setup Script
You can add `.openhands/setup.sh`, which will be run every time OpenHands begins
working with your repository. This is a good place to install dependencies, set
environment variables, etc.
You can add a `.openhands/setup.sh` file, which will run every time OpenHands begins working with your repository.
This is an ideal location for installing dependencies, setting environment variables, and performing other setup tasks.
For example:
```bash

View File

@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -45,7 +45,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.32 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.cli
```

View File

@@ -1,7 +1,8 @@
# Custom Sandbox
:::note
This guide is for users that would like to use their own custom Docker image for the runtime, e.g. with certain tools or programming languages pre-installed
This guide is for users that would like to use their own custom Docker image for the runtime. For example
with certain tools or programming languages pre-installed.
:::
The sandbox is where the agent performs its tasks. Instead of running commands directly on your computer

View File

@@ -0,0 +1 @@
# Using GitLab CI Runners

View File

@@ -24,9 +24,8 @@ OpenHands supports multiple version control providers. You can configure tokens
#### GitHub Token Setup
OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it is available. This can happen in two ways:
OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if provided:
**Local Installation**: The user directly inputs their GitHub token.
<details>
<summary>Setting Up a GitHub Token</summary>
@@ -40,9 +39,8 @@ OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it
- Minimal Permissions ( Select `Meta Data = Read-only` read for search, `Pull Requests = Read and Write` and `Content = Read and Write` for branch creation)
2. **Enter Token in OpenHands**:
- Click the Settings button (gear icon).
- Navigate to the `Git Provider Settings` section.
- Paste your token in the `GitHub Token` field.
- Click `Save Changes` to apply the changes.
- Click `Save` to apply the changes.
</details>
<details>
@@ -83,26 +81,9 @@ OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it
- Check the browser console for any error messages.
</details>
**OpenHands Cloud**: The token is obtained through GitHub OAuth authentication.
<details>
<summary>OAuth Authentication</summary>
When using OpenHands Cloud, the GitHub OAuth flow requests the following permissions:
- Repository access (read/write)
- Workflow management
- Organization read access
To authenticate OpenHands:
- Click `Sign in with GitHub` when prompted.
- Review the requested permissions.
- Authorize OpenHands to access your GitHub account.
- If using an organization, authorize organization access if prompted.
</details>
#### GitLab Token Setup
OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment, for local installations only, if it is available.
OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment if provided:
<details>
<summary>Setting Up a GitLab Token</summary>
@@ -117,10 +98,9 @@ OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment, for l
- Set an expiration date or leave it blank for a non-expiring token.
2. **Enter Token in OpenHands**:
- Click the Settings button (gear icon).
- Navigate to the `Git Provider Settings` section.
- Paste your token in the `GitLab Token` field.
- Enter your GitLab instance URL if using self-hosted GitLab.
- Click `Save Changes` to apply the changes.
- Click `Save` to apply the changes.
</details>
<details>

View File

@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -43,7 +43,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.32 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.main -t "write a bash script that prints hi"
```

View File

@@ -58,17 +58,17 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
The easiest way to run OpenHands is in Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.32
docker.all-hands.dev/all-hands-ai/openhands:0.34
```
You'll find OpenHands running at http://localhost:3000!

View File

@@ -6,23 +6,26 @@
- Displays the conversation between the user and OpenHands.
- OpenHands explains its actions in this panel.
### Changes
- Shows the file changes performed by OpenHands.
### Workspace
- Browse project files and directories.
- Use the `Open in VS Code` option to:
* Modify files
* Upload and download files
### Terminal
- A space for OpenHands and users to run terminal commands.
### Jupyter
- Shows all Python commands that were executed by OpenHands.
- Particularly handy when using OpenHands to perform data visualization tasks.
### App
- Shows the web server when OpenHands runs an application.
- Displays the web server when OpenHands runs an application.
- Users can interact with the running application.
### Browser
- Used by OpenHands to browse websites.
- The browser is non-interactive.
### Terminal
- A space for OpenHands and users to run terminal commands.

View File

@@ -17,6 +17,8 @@ Based on these findings and community feedback, the following models have been v
- [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/)
- [deepseek/deepseek-chat](https://api-docs.deepseek.com/)
- [openai/o3-mini](https://openai.com/index/openai-o3-mini/)
- [openai/o3](https://openai.com/index/introducing-o3-and-o4-mini/)
- [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/)
- [all-hands/openhands-lm-32b-v0.1](https://www.all-hands.dev/blog/introducing-openhands-lm-32b----a-strong-open-coding-agent-model) -- available through [OpenRouter](https://openrouter.ai/all-hands/openhands-lm-32b-v0.1)

View File

@@ -15,7 +15,7 @@ It is highly recommended that you use GPUs to serve local models for optimal exp
For example, to download [OpenHands LM 32B v0.1](https://huggingface.co/all-hands/openhands-lm-32b-v0.1):
```bash
huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir my_folder/openhands-lm-32b-v0.1
huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir all-hands/openhands-lm-32b-v0.1
```
## Create an OpenAI-Compatible Endpoint With a Model Serving Framework
@@ -27,7 +27,7 @@ huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir my_folder/o
```bash
SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \
--model my_folder/openhands-lm-32b-v0.1 \
--model all-hands/openhands-lm-32b-v0.1 \
--served-model-name openhands-lm-32b-v0.1 \
--port 8000 \
--tp 2 --dp 1 \
@@ -41,7 +41,7 @@ SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \
- Example launch command for OpenHands LM 32B (with at least 2 GPUs):
```bash
vllm serve my_folder/openhands-lm-32b-v0.1 \
vllm serve all-hands/openhands-lm-32b-v0.1 \
--host 0.0.0.0 --port 8000 \
--api-key mykey \
--tensor-parallel-size 2 \
@@ -67,7 +67,7 @@ Ensure `config.toml` exists by running `make setup-config` which will create one
workspace_base="/path/to/your/workspace"
[llm]
embedding_model="local"
model="openhands-lm-32b-v0.1"
ollama_base_url="http://localhost:8000"
```

View File

@@ -0,0 +1,49 @@
# Keyword-Triggered Microagents
## Purpose
Keyword-triggered microagents provide OpenHands with specific instructions that are activated when certain keywords
appear in the prompt. This is useful for tailoring behavior based on particular tools, languages, or frameworks.
## Microagent File
Create a keyword-triggered microagent (example: `.openhands/microagents/trigger-keyword.md`) to include instructions
that activate only for prompts with specific keywords.
## Frontmatter Syntax
Frontmatter is required for keyword-triggered microagents. It must be placed at the top of the file,
above the guidelines.
Enclose the frontmatter in triple dashes (---) and include the following fields:
| Field | Description | Required | Default |
|------------|--------------------------------------------------|----------|------------------|
| `name` | A unique identifier for the microagent. | Yes | 'default' |
| `type` | Type of microagent. Must be set to `knowledge`. | Yes | 'repo' |
| `triggers` | A list of keywords that activate the microagent. | Yes | None |
| `agent` | The agent this microagent applies to. | No | 'CodeActAgent' |
## Example
```
---
name: magic_word
type: knowledge
triggers:
- yummyhappy
- happyyummy
agent: CodeActAgent
---
The user has said the magic word. Respond with "That was delicious!"
```
Keyword-triggered microagents:
- Monitor incoming prompts for specified trigger words.
- Activate when relevant triggers are detected.
- Apply their specialized knowledge and capabilities.
- Follow defined guidelines and restrictions.
[See examples of microagents triggered by keywords in the official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)

View File

@@ -1,31 +1,40 @@
# Microagents Overview
Microagents are specialized prompts that enhance OpenHands with domain-specific knowledge, repository-specific context
and task-specific workflows. They help by providing expert guidance, automating common tasks, and ensuring
consistent practices across projects.
Microagents are specialized prompts that enhance OpenHands with domain-specific knowledge.
They provide expert guidance, automate common tasks, and ensure consistent practices across projects.
## Microagent Categories
## Microagent Types
Currently OpenHands supports two categories of microagents:
Currently OpenHands supports the following types of microagents:
- [Repository-specific Microagents](./microagents-repo): Repository-specific context and guidelines for OpenHands.
- [Public Microagents](./microagents-public): General guidelines triggered by keywords for all OpenHands users.
- [General Repository Microagents](./microagents-repo): General guidelines for OpenHands about the repository.
- [Keyword-Triggered Microagents](./microagents-keyword): Guidelines activated by specific keywords in prompts.
A microagent is classified as repository-specific or public depending on its location:
To customize OpenHands' behavior, create a .openhands/microagents/ directory in the root of your repository and
add `<microagent_name>.md` files inside.
- Repository-specific microagents are located in a repository's `.openhands/microagents/` directory
- Public microagents are located in the official OpenHands repository inside the `/microagents` folder
:::note
Loaded microagents take up space in the context window.
These microagents, alongside user messages, inform OpenHands about the task and the environment.
:::
When OpenHands works with a repository, it:
Example repository structure:
1. Loads **repository-specific** microagents from `.openhands/microagents/` if present in the repository.
2. Loads **public knowledge** microagents triggered by keywords in conversations
3. Loads **public tasks** microagents when explicitly requested by the user
```
some-repository/
└── .openhands/
└── microagents/
└── repo.md # General repository guidelines
└── trigger_this.md # Microagent triggered by specific keywords
└── trigger_that.md # Microagent triggered by specific keywords
```
You can check out the existing public microagents at the [official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/).
## Microagents Frontmatter Requirements
## Microagent Format
Each microagent file may include frontmatter that provides additional information. In some cases, this frontmatter
is required:
All microagents use markdown files with YAML frontmatter that have special instructions to help OpenHands activate them.
Check out the [syntax documentation](./microagents-syntax) for a comprehensive guide on how to configure your microagents.
| Microagent Type | Frontmatter Requirement |
|----------------------------------|-------------------------------------------------------|
| `General Repository Microagents` | Required only if more than one of this type exists. |
| `Keyword-Triggered Microagents` | Required. |

View File

@@ -1,35 +1,16 @@
# Public Microagents
# Global Microagents
## Overview
Public microagents provide specialized context and capabilities for all OpenHands users, regardless of their repository configuration. Unlike repository-specific microagents, public microagents are globally available across all repositories.
Global microagents are [keyword-triggered microagents](./microagents-keyword) that apply to all OpenHands users.
Public microagents come in two types:
## Contributing a Global Microagent
- **Knowledge microagents**: Automatically activated when keywords in conversations match their triggers
- **Task microagents**: Explicitly invoked by users to guide through specific workflows
Both types follow the same syntax and structure as repository-specific microagents, using markdown files with YAML frontmatter that define their behavior and capabilities. They are located in the official OpenHands repository under:
- [`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge) for knowledge microagents
- [`microagents/tasks/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/tasks) for task microagents
Public microagents:
- Monitor incoming commands for their trigger words.
- Activate when relevant triggers are detected.
- Apply their specialized knowledge and capabilities.
- Follow their specific guidelines and restrictions.
When loading public microagents, OpenHands scans the official repository's microagents directories recursively, processing all markdown files except README.md. The system categorizes each microagent based on its `type` field in the YAML frontmatter, regardless of its exact file location within the knowledge or tasks directories.
## Contributing a Public Microagent
You can create public microagents and share with the community by opening a pull request to the official repository.
You can create global microagents and share with the community by opening a pull request to the official repository.
See the [CONTRIBUTING.md](https://github.com/All-Hands-AI/OpenHands/blob/main/CONTRIBUTING.md) for specific instructions on how to contribute to OpenHands.
### Public Microagents Best Practices
### Global Microagents Best Practices
- **Clear Scope**: Keep the microagent focused on a specific domain or task.
- **Explicit Instructions**: Provide clear, unambiguous guidelines.
@@ -37,11 +18,11 @@ See the [CONTRIBUTING.md](https://github.com/All-Hands-AI/OpenHands/blob/main/CO
- **Safety First**: Include necessary warnings and constraints.
- **Integration Awareness**: Consider how the microagent interacts with other components.
### Steps to Contribute a Public Microagent
### Steps to Contribute a Global Microagent
#### 1. Plan the Public Microagent
#### 1. Plan the Global Microagent
Before creating a public microagent, consider:
Before creating a global microagent, consider:
- What specific problem or use case will it address?
- What unique capabilities or knowledge should it have?
@@ -51,23 +32,19 @@ Before creating a public microagent, consider:
#### 2. Create File
Create a new Markdown file with a descriptive name in the appropriate directory:
[`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents)
- [`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge) for knowledge microagents
- [`microagents/tasks/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/tasks) for task microagents
#### 3. Testing the Global Microagent
Ensure it follows the correct [syntax](./microagents-syntax.md) and [best practices](./microagents-syntax.md#markdown-content-best-practices).
#### 3. Testing the Public Microagent
- Test the agent with various prompts
- Verify trigger words activate the agent correctly
- Ensure instructions are clear and comprehensive
- Check for potential conflicts and overlaps with existing agents
- Test the agent with various prompts.
- Verify trigger words activate the agent correctly.
- Ensure instructions are clear and comprehensive.
- Check for potential conflicts and overlaps with existing agents.
#### 4. Submission Process
Submit a pull request with:
- The new microagent file
- Updated documentation if needed
- Description of the agent's purpose and capabilities
- The new microagent file.
- Updated documentation if needed.
- Description of the agent's purpose and capabilities.

View File

@@ -1,117 +1,38 @@
# Repository-specific Microagents
# General Repository Microagents
## Overview
## Purpose
OpenHands can be customized to work more effectively with specific repositories by providing repository-specific context and guidelines.
General guidelines for OpenHands to work more effectively with the repository.
This section explains how to optimize OpenHands for your project.
## Microagent File
## Creating Repository Microagents
Create a general repository microagent (example: `.openhands/microagents/repo.md`) to include
project-specific instructions, team practices, coding standards, and architectural guidelines that are relevant for
**all** prompts in that repository.
You can customize OpenHands' behavior for your repository by creating a `.openhands/microagents/` directory in your repository's root.
## Frontmatter Syntax
You can enhance OpenHands' performance by adding custom microagents to your repository:
The frontmatter for this type of microagent is optional, unless you plan to include more than one general
repository microagent.
1. For overall repository-specific instructions, create a `.openhands/microagents/repo.md` file
2. For reusable domain knowledge triggered by keywords, add multiple `.md` files to `.openhands/microagents/knowledge/`
3. For common workflows and tasks, create multiple `.md` files to `.openhands/microagents/tasks/`
Frontmatter should be enclosed in triple dashes (---) and may include the following fields:
Check out the [best practices](./microagents-syntax.md#markdown-content-best-practices) for formatting the content of your custom microagent.
| Field | Description | Required | Default |
|-----------|-----------------------------------------|--------------------------------------------------------------------|----------------|
| `name` | A unique identifier for the microagent | Required only if using more than one general repository microagent | 'default' |
| `agent` | The agent this microagent applies to | No | 'CodeActAgent' |
Keep in mind that loaded microagents take up space in the context window. It's crucial to strike a balance between the additional context provided by microagents and the instructions provided in the user's inputs.
Note that you can use OpenHands to create new microagents. The public microagent [`add_agent`](https://github.com/All-Hands-AI/OpenHands/blob/main/microagents/knowledge/add_agent.md) is loaded to all OpenHands instance and can support you on this.
## Types of Microagents
OpenHands supports three primary types of microagents, each with specific purposes and features to enhance agent performance:
- [repository](#repository-microagents)
- [knowledge](#knowledge-microagents)
- [tasks](#tasks-microagents)
The standard directory structure within a repository is:
- One main `repo.md` file containing repository-specific instructions
- Additional `Knowledge` agents in `.openhands/microagents/knowledge/` directory
- Additional `Task` agents in `.openhands/microagents/tasks/` directory
When processing the `.openhands/microagents/` directory, OpenHands will recursively scan all subfolders and process any `.md` files (except `README.md`) it finds. The system determines the microagent type based on the `type` field in the YAML frontmatter, not by the file's location. However, for organizational clarity, it's recommended to follow the standard directory structure.
### Repository Microagents
The `Repository` microagent is loaded specifically from `.openhands/microagents/repo.md` and serves as the main
repository-specific instruction file. This single file is automatically loaded whenever OpenHands works with that repository
without requiring any keyword matching or explicit call from the user.
OpenHands does not support multiple `repo.md` files in different locations or multiple microagents with type `repo`.
If you need to organize different types of repository information, the recommended approach is to use a single `repo.md` file with well-structured sections rather than trying to create multiple microagents with the type `repo`.
The best practice is to include project-specific instructions, team practices, coding standards, and architectural guidelines that are relevant for **all** prompts in that repository.
Example structure:
## Example
```
your-repository/
└── .openhands/
└── microagents/
└── repo.md # Repository-specific instructions
---
name: repo
---
This project is a TODO application that allows users to track TODO items.
To set it up, you can run `npm run build`.
Always make sure the tests are passing before committing changes. You can run the tests by running `npm run test`.
```
[See the example in the official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/blob/main/.openhands/microagents/repo.md?plain=1)
### Knowledge Microagents
Knowledge microagents provide specialized domain expertise:
- Recommended to be located in `.openhands/microagents/knowledge/`
- Triggered by specific keywords in conversations
- Contain expertise on tools, languages, frameworks, and common practices
Use knowledge microagents to trigger additional context relevant to specific technologies, tools, or workflows. For example, mentioning "git" in your conversation will automatically trigger git-related expertise to help with Git operations.
Examples structure:
```
your-repository/
└── .openhands/
└── microagents/
└── knowledge/
└── git.md
└── docker.md
└── python.md
└── ...
└── repo.md
```
You can find several real examples of `Knowledge` microagents in the [offical OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)
### Tasks Microagents
Task microagents guide users through interactive workflows:
- Recommended to be located in `.openhands/microagents/tasks/`
- Provide step-by-step processes for common development tasks
- Accept inputs and adapt to different scenarios
- Ensure consistent outcomes for complex operations
Task microagents are a convenient way to store multi-step processes you perform regularly. For instance, you can create a `update_pr_description.md` microagent to automatically generate better pull request descriptions based on code changes.
Examples structure:
```
your-repository/
└── .openhands/
└── microagents/
└── tasks/
└── update_pr_description.md
└── address_pr_comments.md
└── get_test_to_pass.md
└── ...
└── knowledge/
└── ...
└── repo.md
```
You can find several real examples of `Tasks` microagents in the [offical OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/tasks)
[See more examples of general repository microagents here.](https://github.com/All-Hands-AI/OpenHands/tree/main/.openhands/microagents)

View File

@@ -1,128 +0,0 @@
# Microagents Syntax
Microagents are defined using markdown files with YAML frontmatter that specify their behavior, triggers, and capabilities.
Find below a comprehensive description of the frontmatter syntax and other details about how to use each type of microagent available at OpenHands.
## Frontmatter Schema
Every microagent requires a YAML frontmatter section at the beginning of the file, enclosed by triple dashes (`---`). The fields are:
| Field | Description | Required | Used By |
| ---------- | -------------------------------------------------- | ------------------------ | ---------------- |
| `name` | Unique identifier for the microagent | Yes | All types |
| `type` | Type of microagent: `repo`, `knowledge`, or `task` | Yes | All types |
| `version` | Version number (Semantic versioning recommended) | Yes | All types |
| `agent` | The agent type (typically `CodeActAgent`) | Yes | All types |
| `author` | Creator of the microagent | No | All types |
| `triggers` | List of keywords that activate the microagent | Yes for knowledge agents | Knowledge agents |
| `inputs` | Defines required user inputs for task execution | Yes for task agents | Task agents |
## Core Fields
### `agent`
**Purpose**: Specifies which agent implementation processes the microagent (typically `CodeActAgent`).
- Defines a single agent responsible for processing the microagent
- Must be available in the OpenHands system (see the [agent hub](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/agenthub))
- If the specified agent is not active, the microagent will not be used
### `triggers`
**Purpose**: Defines keywords that activate the `knowledge` microagent.
**Example**:
```yaml
triggers:
- kubernetes
- k8s
- docker
- security
- containers cluster
```
**Key points**:
- Can include both single words and multi-word phrases
- Case-insensitive matching is typically used
- More specific triggers (like "docker compose") prevent false activations
- Multiple triggers increase the chance of activation in relevant contexts
- Unique triggers like "flarglebargle" can be used for testing or special functionality
- Triggers should be carefully chosen to avoid unwanted activations or conflicts with other microagents
- Common terms used in many conversations may cause the microagent to be activated too frequently
When using multiple triggers, the microagent will be activated if any of the trigger words or phrases appear in the
conversation.
### `inputs`
**Purpose**: Defines parameters required from the user when a `task` microagent is activated.
**Schema**:
```yaml
inputs:
- name: INPUT_NAME # Used with {{ INPUT_NAME }}
description: 'Description of what this input is for'
required: true # Optional, defaults to true
```
**Key points**:
- The `name` and `description` properties are required for each input
- The `required` property is optional and defaults to `true`
- Input values are referenced in the microagent body using double curly braces (e.g., `{{ INPUT_NAME }}`)
- All inputs defined will be collected from the user before the task microagent executes
**Variable Usage**: Reference input values using double curly braces `{{ INPUT_NAME }}`.
## Example Formats
### Repository Microagent
Repository microagents provide context and guidelines for a specific repository.
- Located at: `.openhands/microagents/repo.md`
- Automatically loaded when working with the repository
- Only one per repository
The `Repository` microagent is loaded specifically from `.openhands/microagents/repo.md` and serves as the main
repository-specific instruction file. This single file is automatically loaded whenever OpenHands works with that repository
without requiring any keyword matching or explicit call from the user.
[See the example in the official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/blob/main/.openhands/microagents/repo.md?plain=1)
### Knowledge Microagent
Provides specialized domain expertise triggered by keywords.
You can find several real examples of `Knowledge` microagents in the [offical OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)
### Task Microagent
When explicitly asked by the user, will guide through interactive workflows with specific inputs.
You can find several real examples of `Tasks` microagents in the [offical OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/tasks)
## Markdown Content Best Practices
After the frontmatter, compose the microagent body using Markdown syntax. Examples of elements you can include are:
- Clear, concise instructions outlining the microagent's purpose and responsibilities
- Specific guidelines and constraints the microagent should adhere to
- Relevant code snippets and practical examples to illustrate key points
- Step-by-step procedures for task agents, guiding users through workflows
**Design Tips**:
- Keep microagents focused with a clear purpose
- Provide specific guidelines rather than general advice
- Use distinctive triggers for knowledge agents
- Keep content concise to minimize context window usage
- Break large microagents into smaller, focused ones
Aim for clarity, brevity, and practicality in your writing. Use formatting like bullet points, code blocks, and emphasis to enhance readability and comprehension.
Remember that balancing microagents details with user input space is important for maintaining effective interactions.

433
docs/package-lock.json generated
View File

@@ -24,6 +24,8 @@
"@docusaurus/module-type-aliases": "^3.5.1",
"@docusaurus/tsconfig": "^3.7.0",
"@docusaurus/types": "^3.5.1",
"swagger-cli": "^4.0.4",
"swagger-ui-dist": "^5.21.0",
"typescript": "~5.8.3"
},
"engines": {
@@ -273,6 +275,273 @@
"node": ">=6.0.0"
}
},
"node_modules/@apidevtools/openapi-schemas": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/@apidevtools/openapi-schemas/-/openapi-schemas-2.1.0.tgz",
"integrity": "sha512-Zc1AlqrJlX3SlpupFGpiLi2EbteyP7fXmUOGup6/DnkRgjP9bgMM/ag+n91rsv0U1Gpz0H3VILA/o3bW7Ua6BQ==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=10"
}
},
"node_modules/@apidevtools/swagger-cli": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-cli/-/swagger-cli-4.0.4.tgz",
"integrity": "sha512-hdDT3B6GLVovCsRZYDi3+wMcB1HfetTU20l2DC8zD3iFRNMC6QNAZG5fo/6PYeHWBEv7ri4MvnlKodhNB0nt7g==",
"deprecated": "This package has been abandoned. Please switch to using the actively maintained @redocly/cli",
"dev": true,
"license": "MIT",
"dependencies": {
"@apidevtools/swagger-parser": "^10.0.1",
"chalk": "^4.1.0",
"js-yaml": "^3.14.0",
"yargs": "^15.4.1"
},
"bin": {
"swagger-cli": "bin/swagger-cli.js"
},
"engines": {
"node": ">=10"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/argparse": {
"version": "1.0.10",
"resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
"integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
"dev": true,
"license": "MIT",
"dependencies": {
"sprintf-js": "~1.0.2"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/camelcase": {
"version": "5.3.1",
"resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz",
"integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/cliui": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-6.0.0.tgz",
"integrity": "sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ==",
"dev": true,
"license": "ISC",
"dependencies": {
"string-width": "^4.2.0",
"strip-ansi": "^6.0.0",
"wrap-ansi": "^6.2.0"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/emoji-regex": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
"dev": true,
"license": "MIT"
},
"node_modules/@apidevtools/swagger-cli/node_modules/find-up": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz",
"integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==",
"dev": true,
"license": "MIT",
"dependencies": {
"locate-path": "^5.0.0",
"path-exists": "^4.0.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/js-yaml": {
"version": "3.14.1",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz",
"integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==",
"dev": true,
"license": "MIT",
"dependencies": {
"argparse": "^1.0.7",
"esprima": "^4.0.0"
},
"bin": {
"js-yaml": "bin/js-yaml.js"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/locate-path": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz",
"integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==",
"dev": true,
"license": "MIT",
"dependencies": {
"p-locate": "^4.1.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/p-limit": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
"integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
"dev": true,
"license": "MIT",
"dependencies": {
"p-try": "^2.0.0"
},
"engines": {
"node": ">=6"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/p-locate": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
"integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==",
"dev": true,
"license": "MIT",
"dependencies": {
"p-limit": "^2.2.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/path-exists": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
"integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/string-width": {
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
"dev": true,
"license": "MIT",
"dependencies": {
"emoji-regex": "^8.0.0",
"is-fullwidth-code-point": "^3.0.0",
"strip-ansi": "^6.0.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/wrap-ansi": {
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-6.2.0.tgz",
"integrity": "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==",
"dev": true,
"license": "MIT",
"dependencies": {
"ansi-styles": "^4.0.0",
"string-width": "^4.1.0",
"strip-ansi": "^6.0.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/y18n": {
"version": "4.0.3",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.3.tgz",
"integrity": "sha512-JKhqTOwSrqNA1NY5lSztJ1GrBiUodLMmIZuLiDaMRJ+itFd+ABVE8XBjOvIWL+rSqNDC74LCSFmlb/U4UZ4hJQ==",
"dev": true,
"license": "ISC"
},
"node_modules/@apidevtools/swagger-cli/node_modules/yargs": {
"version": "15.4.1",
"resolved": "https://registry.npmjs.org/yargs/-/yargs-15.4.1.tgz",
"integrity": "sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A==",
"dev": true,
"license": "MIT",
"dependencies": {
"cliui": "^6.0.0",
"decamelize": "^1.2.0",
"find-up": "^4.1.0",
"get-caller-file": "^2.0.1",
"require-directory": "^2.1.1",
"require-main-filename": "^2.0.0",
"set-blocking": "^2.0.0",
"string-width": "^4.2.0",
"which-module": "^2.0.0",
"y18n": "^4.0.0",
"yargs-parser": "^18.1.2"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/yargs-parser": {
"version": "18.1.3",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-18.1.3.tgz",
"integrity": "sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==",
"dev": true,
"license": "ISC",
"dependencies": {
"camelcase": "^5.0.0",
"decamelize": "^1.2.0"
},
"engines": {
"node": ">=6"
}
},
"node_modules/@apidevtools/swagger-methods": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-methods/-/swagger-methods-3.0.2.tgz",
"integrity": "sha512-QAkD5kK2b1WfjDS/UQn/qQkbwF31uqRjPTrsCs5ZG9BQGAkjwvqGFjjPqAuzac/IYzpPtRzjCP1WrTuAIjMrXg==",
"dev": true,
"license": "MIT"
},
"node_modules/@apidevtools/swagger-parser": {
"version": "10.1.1",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-parser/-/swagger-parser-10.1.1.tgz",
"integrity": "sha512-u/kozRnsPO/x8QtKYJOqoGtC4kH6yg1lfYkB9Au0WhYB0FNLpyFusttQtvhlwjtG3rOwiRz4D8DnnXa8iEpIKA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@apidevtools/json-schema-ref-parser": "11.7.2",
"@apidevtools/openapi-schemas": "^2.1.0",
"@apidevtools/swagger-methods": "^3.0.2",
"@jsdevtools/ono": "^7.1.3",
"ajv": "^8.17.1",
"ajv-draft-04": "^1.0.0",
"call-me-maybe": "^1.0.2"
},
"peerDependencies": {
"openapi-types": ">=7"
}
},
"node_modules/@apidevtools/swagger-parser/node_modules/@apidevtools/json-schema-ref-parser": {
"version": "11.7.2",
"resolved": "https://registry.npmjs.org/@apidevtools/json-schema-ref-parser/-/json-schema-ref-parser-11.7.2.tgz",
"integrity": "sha512-4gY54eEGEstClvEkGnwVkTkrx0sqwemEFG5OSRRn3tD91XH0+Q8XIkYIfo7IwEWPpJZwILb9GUXeShtplRc/eA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@jsdevtools/ono": "^7.1.3",
"@types/json-schema": "^7.0.15",
"js-yaml": "^4.1.0"
},
"engines": {
"node": ">= 16"
},
"funding": {
"url": "https://github.com/sponsors/philsturgeon"
}
},
"node_modules/@babel/code-frame": {
"version": "7.26.2",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
@@ -3835,6 +4104,13 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@jsdevtools/ono": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/@jsdevtools/ono/-/ono-7.1.3.tgz",
"integrity": "sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg==",
"dev": true,
"license": "MIT"
},
"node_modules/@leichtgewicht/ip-codec": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.5.tgz",
@@ -3970,6 +4246,14 @@
"integrity": "sha512-8LduaNlMZGwdZ6qWrKlfa+2M4gahzFkprZiAt2TF8uS0qQgBizKXpXURqvTJ4WtmupWxaLqjRb2UCTe72mu+Aw==",
"license": "MIT"
},
"node_modules/@scarf/scarf": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/@scarf/scarf/-/scarf-1.4.0.tgz",
"integrity": "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ==",
"dev": true,
"hasInstallScript": true,
"license": "Apache-2.0"
},
"node_modules/@sideway/address": {
"version": "4.1.5",
"resolved": "https://registry.npmjs.org/@sideway/address/-/address-4.1.5.tgz",
@@ -4967,6 +5251,21 @@
"url": "https://github.com/sponsors/epoberezkin"
}
},
"node_modules/ajv-draft-04": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/ajv-draft-04/-/ajv-draft-04-1.0.0.tgz",
"integrity": "sha512-mv00Te6nmYbRp5DCwclxtt7yV/joXJPGS7nM+97GdxvuttCOfgI3K4U25zboyeX0O+myI8ERluxQe5wljMmVIw==",
"dev": true,
"license": "MIT",
"peerDependencies": {
"ajv": "^8.5.0"
},
"peerDependenciesMeta": {
"ajv": {
"optional": true
}
}
},
"node_modules/ajv-formats": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-2.1.1.tgz",
@@ -5549,6 +5848,13 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/call-me-maybe": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-me-maybe/-/call-me-maybe-1.0.2.tgz",
"integrity": "sha512-HpX65o1Hnr9HH25ojC1YGs7HCQLq0GCOibSaWER0eNpgJ/Z1MZv2mTc7+xh6WOPxbRVcmgbv4hGU+uSQ/2xFZQ==",
"dev": true,
"license": "MIT"
},
"node_modules/callsites": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@@ -7192,6 +7498,16 @@
}
}
},
"node_modules/decamelize": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz",
"integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/decode-named-character-reference": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.0.2.tgz",
@@ -8600,6 +8916,16 @@
"node": ">=6.9.0"
}
},
"node_modules/get-caller-file": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
"dev": true,
"license": "ISC",
"engines": {
"node": "6.* || 8.* || >= 10.*"
}
},
"node_modules/get-intrinsic": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.7.tgz",
@@ -13110,15 +13436,16 @@
}
},
"node_modules/nanoid": {
"version": "3.3.7",
"resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz",
"integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==",
"version": "3.3.11",
"resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
"integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/ai"
}
],
"license": "MIT",
"bin": {
"nanoid": "bin/nanoid.cjs"
},
@@ -13427,6 +13754,14 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/openapi-types": {
"version": "12.1.3",
"resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz",
"integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==",
"dev": true,
"license": "MIT",
"peer": true
},
"node_modules/opener": {
"version": "1.5.2",
"resolved": "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz",
@@ -13809,9 +14144,9 @@
}
},
"node_modules/postcss": {
"version": "8.4.38",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.38.tgz",
"integrity": "sha512-Wglpdk03BSfXkHoQa3b/oulrotAkwrlLDRSOb9D0bN86FdRyE9lppSp33aHNPgBa0JKCoB+drFLZkQoRRYae5A==",
"version": "8.4.49",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.49.tgz",
"integrity": "sha512-OCVPnIObs4N29kxTjzLfUryOkvZEq+pf8jTF0lg8E7uETuWHA+v7j3c/xJmiqpX450191LlmZfUKkXxkTry7nA==",
"funding": [
{
"type": "opencollective",
@@ -13826,10 +14161,11 @@
"url": "https://github.com/sponsors/ai"
}
],
"license": "MIT",
"dependencies": {
"nanoid": "^3.3.7",
"picocolors": "^1.0.0",
"source-map-js": "^1.2.0"
"picocolors": "^1.1.1",
"source-map-js": "^1.2.1"
},
"engines": {
"node": "^10 || ^12 || >=14"
@@ -15367,6 +15703,15 @@
"node": ">= 0.10"
}
},
"node_modules/punycode": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
"integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/pupa": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/pupa/-/pupa-3.1.0.tgz",
@@ -16205,6 +16550,16 @@
"node": ">=0.10"
}
},
"node_modules/require-directory": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/require-from-string": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
@@ -16222,6 +16577,13 @@
"node": "*"
}
},
"node_modules/require-main-filename": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/require-main-filename/-/require-main-filename-2.0.0.tgz",
"integrity": "sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==",
"dev": true,
"license": "ISC"
},
"node_modules/requires-port": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz",
@@ -16702,6 +17064,13 @@
"node": ">= 0.8.0"
}
},
"node_modules/set-blocking": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
"integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==",
"dev": true,
"license": "ISC"
},
"node_modules/set-function-length": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
@@ -16980,9 +17349,10 @@
}
},
"node_modules/source-map-js": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz",
"integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==",
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
"integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
"license": "BSD-3-Clause",
"engines": {
"node": ">=0.10.0"
}
@@ -17347,6 +17717,32 @@
"resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.30.tgz",
"integrity": "sha512-GaqWWShW4kv/G9IEucWScBx9G1/vsFZZJUO+tD26M8J8z3Kw5RDQjaoZe03YAClgeS/SWPOcb4nkFBTEi5DUEA=="
},
"node_modules/swagger-cli": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/swagger-cli/-/swagger-cli-4.0.4.tgz",
"integrity": "sha512-Cp8YYuLny3RJFQ4CvOBTaqmOOgYsem52dPx1xM5S4EUWFblIh2Q8atppMZvXKUr1e9xH5RwipYpmdUzdPcxWcA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@apidevtools/swagger-cli": "4.0.4"
},
"bin": {
"swagger-cli": "swagger-cli.js"
},
"engines": {
"node": ">=10"
}
},
"node_modules/swagger-ui-dist": {
"version": "5.21.0",
"resolved": "https://registry.npmjs.org/swagger-ui-dist/-/swagger-ui-dist-5.21.0.tgz",
"integrity": "sha512-E0K3AB6HvQd8yQNSMR7eE5bk+323AUxjtCz/4ZNKiahOlPhPJxqn3UPIGs00cyY/dhrTDJ61L7C/a8u6zhGrZg==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"@scarf/scarf": "=1.4.0"
}
},
"node_modules/tapable": {
"version": "2.2.1",
"resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
@@ -17949,14 +18345,6 @@
"punycode": "^2.1.0"
}
},
"node_modules/uri-js/node_modules/punycode": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
"integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
"engines": {
"node": ">=6"
}
},
"node_modules/url-loader": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/url-loader/-/url-loader-4.1.1.tgz",
@@ -18610,6 +18998,13 @@
"node": ">= 8"
}
},
"node_modules/which-module": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.1.tgz",
"integrity": "sha512-iBdZ57RDvnOR9AGBhML2vFZf7h8vmBjhoaZqODJBFWHVtKkDmKuHai3cx5PgVMrX5YDNp27AofYbAwctSS+vhQ==",
"dev": true,
"license": "ISC"
},
"node_modules/widest-line": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/widest-line/-/widest-line-4.0.1.tgz",

View File

@@ -4,16 +4,18 @@
"private": true,
"scripts": {
"docusaurus": "docusaurus",
"start": "docusaurus start",
"build": "docusaurus build",
"start": "node generate-swagger-ui.js && docusaurus start",
"build": "node generate-swagger-ui.js && docusaurus build",
"swizzle": "docusaurus swizzle",
"deploy": "docusaurus deploy",
"clear": "docusaurus clear",
"serve": "docusaurus serve",
"write-translations": "docusaurus write-translations",
"write-heading-ids": "docusaurus write-heading-ids",
"typecheck": "tsc"
"typecheck": "tsc",
"generate-swagger-ui": "node generate-swagger-ui.js"
},
"// Note": "The OpenAPI spec is stored in docs/static/openapi.json so it's accessible at /openapi.json in the deployed site",
"dependencies": {
"@docusaurus/core": "^3.7.0",
"@docusaurus/plugin-content-pages": "^3.7.0",
@@ -31,6 +33,8 @@
"@docusaurus/module-type-aliases": "^3.5.1",
"@docusaurus/tsconfig": "^3.7.0",
"@docusaurus/types": "^3.5.1",
"swagger-cli": "^4.0.4",
"swagger-ui-dist": "^5.21.0",
"typescript": "~5.8.3"
},
"browserslist": {
@@ -47,5 +51,6 @@
},
"engines": {
"node": ">=18.0"
}
},
"packageManager": "yarn@1.22.22+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e"
}

View File

@@ -66,18 +66,18 @@ const sidebars: SidebarsConfig = {
},
{
type: 'doc',
label: 'Repository-specific',
label: 'General Repository Microagents',
id: 'usage/prompting/microagents-repo',
},
{
type: 'doc',
label: 'Public',
id: 'usage/prompting/microagents-public',
label: 'Keyword-Triggered Microagents',
id: 'usage/prompting/microagents-keyword',
},
{
type: 'doc',
label: 'Syntax',
id: 'usage/prompting/microagents-syntax',
label: 'Global Microagents',
id: 'usage/prompting/microagents-public',
},
],
},

15
docs/static/README.md vendored Normal file
View File

@@ -0,0 +1,15 @@
# Static Files for OpenHands Documentation
This directory contains static files that are copied directly to the build output of the Docusaurus documentation.
## OpenAPI Specification
The `openapi.json` file in this directory is the OpenAPI specification for the OpenHands API. It is copied to the build output and is accessible at `/openapi.json` in the deployed site.
This file is used by the Swagger UI interface, which is accessible at `/swagger-ui/` in the deployed site.
## Why is the OpenAPI spec in the static directory?
The OpenAPI specification is placed in the static directory so that it's accessible at a predictable URL in the deployed site. This allows the Swagger UI to reference it directly.
We only need one copy of the OpenAPI spec file, which is this one in the static directory.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 148 KiB

After

Width:  |  Height:  |  Size: 120 KiB

2085
docs/static/openapi.json vendored Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
config.yaml

View File

@@ -0,0 +1,35 @@
# CI Builds Repair Benchmark Integration
This module integrates the CI Builds Repair benchmark developed by [JetBrains-Research](https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair/ci-builds-repair-benchmark).
For more information, refer to the [GitHub repository](https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair/ci-builds-repair-benchmark) and the associated [research paper](https://arxiv.org/abs/2406.11612).
See notice below for details
## Setup
Before running any scripts, make sure to configure the benchmark by setting up `config.yaml`.
This benchmark pushes to JetBrains' private GitHub repository. You will to request a `token_gh` provided by their team, to run this benchmark.
## Inference
To run inference with your model:
```bash
./evaluation/benchmarks/lca_ci_build_repair/scripts/run_infer.sh llm.yourmodel
```
## Evaluation
To evaluate the predictions:
```bash
./evaluation/benchmarks/lca_ci_build_repair/scripts/eval_infer.sh predictions_path_containing_output
```
## Results
The benchmark contains 68 instances, we skip instances #126 and #145, and only run 66 instances due to dockerization errors.
Due to running in live GitHub machines, the benchmark is sensitive to the date it is run. Even the golden patches in the dataset might present failures due to updates.
For example, on 2025-04-09, running the benchmark against the golden patches gave 57/67 successes, with 1 job left in the waiting list.
On 2025-04-10, running the benchmark full with OH and no oracle, 37 succeeded. That is 54% of the complete set of 68 instances and 64% of the 57 that succeed with golden patches.

View File

@@ -0,0 +1,11 @@
LCA_PATH: path #where to clone lca-ci rep
model_name: OpenHands
benchmark_owner: ICML-25-BenchName-builds-repair
token_gh: your_token
#for lca-ci-repo
repos_folder: /path/to/repos # here the cloned repos would be stored
out_folder: /out/folder # here the result files would be stored
data_cache_dir: /data/cache/dir/ # here the cached dataset would be stored
username_gh: username-gh # your GitHub username
# test_username: test_user # username that would be displayed in the benchmark. Optional. If ommitted, username_gh would be used
language: Python # dataset language (now only Python is available)

View File

@@ -0,0 +1,242 @@
"""Implements evaluation on JetBrains CI builds repair baselines
Please see https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair
and https://huggingface.co/datasets/JetBrains-Research/lca-ci-builds-repair
TODOs:
- Add more flags
"""
import json
import os
from pathlib import Path
import ruamel.yaml
from evaluation.utils.shared import (
EvalMetadata,
get_default_sandbox_config_for_eval,
make_metadata,
)
from openhands.core.config import (
AppConfig,
LLMConfig,
get_parser,
load_app_config,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime
from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
def get_config(
metadata: EvalMetadata,
) -> AppConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config.enable_prompt_extensions = False
return config
config = load_app_config()
def load_bench_config():
script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
config_path = os.path.join(script_dir, 'config.yaml')
yaml = ruamel.yaml.YAML(typ='rt')
with open(config_path, 'r') as file:
return yaml.load(file)
bench_config = load_bench_config()
def run_eval(
runtime: Runtime,
):
"""Run the evaluation and create report"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
obs: CmdOutputObservation
lca_path = bench_config['LCA_PATH']
lca_ci_path = os.path.join(
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
)
model_name = bench_config['model_name']
action = CmdRunAction(command=f'mkdir {lca_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command=f'cd {lca_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
lca_repo_url = 'https://github.com/juanmichelini/lca-baselines'
action = CmdRunAction(command=f'git clone {lca_repo_url}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command=f'cd {lca_ci_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command='git switch open-hands-integration')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
config_path = os.path.join(script_dir, 'config.yaml')
runtime.copy_to(config_path, lca_ci_path)
token_gh = bench_config['token_gh']
commandf = f'export TOKEN_GH={token_gh}'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
action = CmdRunAction(command='poetry install')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
# Set up the task environment
commandf = f'poetry run python run_eval_jobs.py --model-name "{model_name}" --config-path "{lca_ci_path}/config.yaml" --job-ids-file "/tmp/output_lca.jsonl" --result-filename "testfile.jsonl" > /tmp/single_output.txt'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(f'run_eval_jobs.py gave {obs.content} !')
# assert obs.exit_code == 0
commandf = 'cat /tmp/single_output.txt'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(f' {commandf} gave {obs.content}!')
testfile_path = os.path.join(bench_config['out_folder'], 'testfile.jsonl')
commandf = f'cat {testfile_path}'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
report_str = obs.content
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
return report_str
def process_predictions(predictions_path: str):
output_path = Path(predictions_path)
if output_path.suffix != '.jsonl':
raise ValueError('output_path must end in .jsonl')
output_lca_path = output_path.with_name(output_path.stem + '_lca.jsonl')
with output_path.open() as infile, output_lca_path.open('w') as outfile:
for line in infile:
data = json.loads(line)
json.dump(data.get('test_result'), outfile)
outfile.write('\n')
return str(output_lca_path)
if __name__ == '__main__':
parser = get_parser()
parser.add_argument(
'-s',
'--eval-split',
type=str,
default='test',
choices=['test'],
help='data split to evaluate on, must be test',
)
parser.add_argument(
'--predictions-path',
type=str,
help='Path to the directory containing the output.jsonl with the predictions.',
)
args, _ = parser.parse_known_args()
data_split = args.eval_split
llm_config = LLMConfig(model='dummy_model')
metadata = make_metadata(
llm_config,
f'jetbrains-lca-ci--{data_split}',
args.agent_cls,
args.max_iterations,
args.eval_note,
args.predictions_path,
)
# prepare image
config = get_config(metadata)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
logger.info('Converting output.jsonl into output_lca.jsonl')
predictions_lca_path = process_predictions(
os.path.join(args.predictions_path, 'output.jsonl')
)
runtime.copy_to(predictions_lca_path, '/tmp')
# get results
results_str = run_eval(runtime)
results_path = os.path.join(args.predictions_path, 'results.jsonl')
with open(results_path, 'w') as file:
file.write(results_str)
logger.info(f'Saved results to {results_path}')
# make a summary
resolved_instances = []
unresolved_instances = []
for line in results_str.strip().splitlines():
data = json.loads(line)
conclusion = data.get('conclusion')
if conclusion == 'success':
resolved_instances.append(data)
elif conclusion == 'failure':
unresolved_instances.append(data)
completed_instances = resolved_instances + unresolved_instances
report = {
'success': len(resolved_instances),
'failure': len(unresolved_instances),
'resolved_instances': resolved_instances,
'unresolved_instances': unresolved_instances,
'completed_instances': completed_instances,
}
print(f'Results: {report}')
report_path = os.path.join(args.predictions_path, 'report.jsonl')
with open(report_path, 'w') as out_f:
out_f.write(json.dumps(report) + '\n')
logger.info(f'Saved report of results in swebench format to {report_path}')

View File

@@ -0,0 +1,406 @@
"""Implements inference on JetBrains CI builds repair baselines
Please see https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair
and https://huggingface.co/datasets/JetBrains-Research/lca-ci-builds-repair
TODOs:
- Add EXP_NAME
"""
import asyncio
import json
import os
from typing import Any
import pandas as pd
import ruamel.yaml
from datasets import load_dataset
from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
)
from openhands.controller.state.state import State
from openhands.core.config import (
AppConfig,
get_llm_config_arg,
get_parser,
load_app_config,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
def get_config(
metadata: EvalMetadata,
) -> AppConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config.enable_prompt_extensions = False
return config
config = load_app_config()
def load_bench_config():
script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
config_path = os.path.join(script_dir, 'config.yaml')
yaml = ruamel.yaml.YAML(typ='rt')
with open(config_path, 'r') as file:
return yaml.load(file)
bench_config = load_bench_config()
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
}
AGENT_CLS_TO_INST_SUFFIX = {
'CodeActAgent': 'When you think you have completed the task, please finish the interaction using the "finish" tool.\n'
}
def initialize_runtime(
runtime: Runtime,
instance: pd.Series,
):
"""Initialize the runtime for the agent.
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
obs: CmdOutputObservation
lca_path = bench_config['LCA_PATH']
lca_ci_path = os.path.join(
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
)
repo_name = instance['repo_name']
repos_path = bench_config['repos_folder']
repo_owner = instance['repo_owner']
repo_path = os.path.join(repos_path, f'{repo_owner}__{repo_name}')
model_name = bench_config['model_name']
action = CmdRunAction(command=f'mkdir {lca_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command=f'cd {lca_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
lca_repo_url = 'https://github.com/juanmichelini/lca-baselines'
action = CmdRunAction(command=f'git clone {lca_repo_url}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command=f'cd {lca_ci_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command='git switch open-hands-integration')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
config_path = os.path.join(script_dir, 'config.yaml')
with open(config_path, 'r') as file:
config_as_text = file.read()
commandf = f"echo '{config_as_text}' > config.yaml"
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
token_gh = bench_config['token_gh']
commandf = f'export TOKEN_GH={token_gh}'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
action = CmdRunAction(command='poetry install')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
# Set up the task environment
commandf = f'poetry run python run_get_datapoint.py --model-name {model_name} --id {instance["id"]} > branch_name.txt'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
if obs.exit_code != 0:
print(f'run_get_datapoint.py failed at {instance["id"]} with {obs.content}')
assert obs.exit_code == 0
commandf = 'cat branch_name.txt'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
bench_config['user_branch_name'] = obs.content
# Navigate to the task's code path
action = CmdRunAction(command=f'cd {repo_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
def complete_runtime(
runtime: Runtime,
instance: pd.Series,
) -> dict[str, Any]:
"""Complete the runtime for the agent.
This function is called before the runtime is used to run the agent.
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
obs: CmdOutputObservation
model_name = bench_config['model_name']
lca_path = bench_config['LCA_PATH']
lca_ci_path = os.path.join(
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
)
user_branch_name = bench_config['user_branch_name']
token_gh = bench_config['token_gh']
commandf = f'export TOKEN_GH={token_gh}'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
# Navigate to the lca-baseslines scripts path
action = CmdRunAction(command=f'cd {lca_ci_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
commandf = f'poetry run python run_push_datapoint.py --id {instance["id"]} --model-name {model_name} --user-branch-name {user_branch_name} > single_output.json'
logger.info(f'Running push script: {commandf}')
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
# assert obs.exit_code == 0
commandf = 'cat single_output.json'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
result = json.loads(obs.content)
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
return result
def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = True):
config = get_config(metadata)
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
reset_logger_for_multiprocessing(logger, instance['instance_id'], log_dir)
else:
logger.info(f'Starting evaluation for instance {instance["instance_id"]}.')
repo_name = instance['repo_name']
repo_workflow = instance['workflow_path']
repo_logs = instance['logs']
repos_path = bench_config['repos_folder']
repo_owner = instance['repo_owner']
repo_path = os.path.join(repos_path, f'{repo_owner}__{repo_name}')
# Prepare the task instruction
instruction_no_oracle = f"""
<uploaded_files>
{repo_path}
</uploaded_files>
I've uploaded a python code repository in the directory {repo_path}, Consider the following issue:
<issue_description>
The repository must pass the CI workflow {repo_workflow}.
but it gave the following error
{repo_logs}
</issue_description>
Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?
I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.
Your task is to make the minimal changes to non-test files in the {repo_path} directory to ensure the <issue_description> is satisfied.
Follow these phases to resolve the issue:
Phase 1. READING: read the problem and reword it in clearer terms
1.1 If there are code or config snippets. Express in words any best practices or conventions in them.
1.2 Hightlight message errors, method names, variables, file names, stack traces, and technical details.
1.3 Explain the problem in clear terms.
1.4 Enumerate the steps to reproduce the problem.
1.5 Hightlight any best practices to take into account when testing and fixing the issue
Phase 2. RUNNING: install and run the tests on the repository
2.1 Follow the readme
2.2 Install the environment and anything needed
2.2 Iterate and figure out how to run the tests
Phase 3. EXPLORATION: find the files that are related to the problem and possible solutions
3.1 Use `grep` to search for relevant methods, classes, keywords and error messages.
3.2 Identify all files related to the problem statement.
3.3 Propose the methods and files to fix the issue and explain why.
3.4 From the possible file locations, select the most likely location to fix the issue.
Phase 4. TEST CREATION: before implementing any fix, create a script to reproduce and verify the issue.
4.1 Look at existing test files in the repository to understand the test format/structure.
4.2 Create a minimal reproduction script that reproduces the located issue.
4.3 Run the reproduction script to confirm you are reproducing the issue.
4.4 Adjust the reproduction script as necessary.
Phase 5. FIX ANALYSIS: state clearly the problem and how to fix it
5.1 State clearly what the problem is.
5.2 State clearly where the problem is located.
5.3 State clearly how the test reproduces the issue.
5.4 State clearly the best practices to take into account in the fix.
5.5 State clearly how to fix the problem.
Phase 6. FIX IMPLEMENTATION: Edit the source code to implement your chosen solution.
6.1 Make minimal, focused changes to fix the issue.
Phase 7. VERIFICATION: Test your implementation thoroughly.
7.1 Run your reproduction script to verify the fix works.
7.2 Add edge cases to your test script to ensure comprehensive coverage.
7.3 Run existing tests related to the modified code to ensure you haven't broken anything. Run any tests in the repository related to:
7.2.1 The issue you are fixing
7.2.2 The files you modified
7.2.3 The functions you changed
7.4 If any tests fail, revise your implementation until all tests pass
Phase 8. REVIEW: Carefully re-read the problem description and compare your changes with the base commit {instance["sha_fail"]}.
8.1 Ensure you've fully addressed all requirements.
Once all phases are done, announce: 'Agent Task Complete'.
Be thorough in your exploration, testing, and reasoning. It's fine if your thinking process is lengthy - quality and completeness are more important than brevity.
"""
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Run the agent
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=MessageAction(content=instruction_no_oracle),
runtime=runtime,
fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
metadata.agent_class
),
)
)
assert state is not None
metrics = state.metrics.get() if state.metrics else {}
test_result = complete_runtime(runtime, instance)
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here
# remove when it becomes unnecessary
histories = compatibility_for_eval_history_pairs(state.history)
# Save the output
output = EvalOutput(
instance_id=instance['instance_id'],
# instance=instance.to_dict(orient='recorods'),
instruction=instruction_no_oracle,
metadata=metadata,
history=histories,
test_result=test_result,
metrics=metrics,
)
return output
if __name__ == '__main__':
parser = get_parser()
parser.add_argument(
'-s',
'--eval-split',
type=str,
default='test',
choices=['test'],
help='data split to evaluate on, must be test',
)
args, _ = parser.parse_known_args()
data_split = args.eval_split
bench = load_dataset(
'JetBrains-Research/lca-ci-builds-repair', split=data_split
).to_pandas()
# todo: see why 126 is giving problems on inference
# todo: see why 145 is giving problems on eval
bench = bench[bench['id'] != 126]
bench = bench[bench['id'] != 145]
# bench = bench.iloc[0:56]
# add column instnace_id for compatibility with oh repo, old id column must be kept for lca repo
bench['instance_id'] = bench['id'].astype(str)
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
metadata = make_metadata(
llm_config,
f'jetbrains-lca-ci--{data_split}',
args.agent_cls,
args.max_iterations,
args.eval_note,
args.eval_output_dir,
)
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
instances = prepare_dataset(bench, output_file, args.eval_n_limit)
run_evaluation(
instances, metadata, output_file, args.eval_num_workers, process_instance
)

View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
set -eo pipefail
source "evaluation/utils/version_control.sh"
PROCESS_FILEPATH=$1
if [ -z "$PROCESS_FILEPATH" ]; then
echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
exit 1
fi
get_openhands_version
PROCESS_FILEPATH=$(realpath $PROCESS_FILEPATH)
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
echo "PROCESS_FILEPATH: $PROCESS_FILEPATH"
EVAL_NOTE="$OPENHANDS_VERSION"
if [ -n "$EXP_NAME" ]; then
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
fi
function run_eval() {
COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/eval_infer.py \
--predictions-path $PROCESS_FILEPATH "
echo "RUNNING: $COMMAND"
# Run the command
eval $COMMAND
}
unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
run_eval

View File

@@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -eo pipefail
source "evaluation/utils/version_control.sh"
MODEL_CONFIG=$1
get_openhands_version
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
echo "MODEL_CONFIG: $MODEL_CONFIG"
EVAL_NOTE="$OPENHANDS_VERSION"
if [ -n "$EXP_NAME" ]; then
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
fi
function run_eval() {
COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/run_infer.py \
--llm-config $MODEL_CONFIG "
# Run the command
eval $COMMAND
}
#unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
run_eval

View File

@@ -0,0 +1,60 @@
"""Installs LCA CI Build Repair benchmark with scripts for OH integration."""
import os
import shutil
import subprocess
import yaml
def setup():
# Read config.yaml
print('Reading config.yaml')
script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
config_path = os.path.join(script_dir, 'config.yaml')
with open(config_path, 'r') as f:
config = yaml.safe_load(f)
lca_path = config['LCA_PATH']
lca_ci_path = os.path.join(
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
)
repo_url = 'https://github.com/juanmichelini/lca-baselines'
# Clone the repository to LCA_CI_PATH
print(f'Cloning lca-baselines repository from {repo_url} into {lca_path}')
result = subprocess.run(
['git', 'clone', repo_url], cwd=lca_path, capture_output=True, text=True
)
if result.returncode != 0:
print(f'Warning cloning repository: {result.stderr}')
# Clone the repository to LCA_CI_PATH
print('Switching branches')
result = subprocess.run(
['git', 'switch', 'open-hands-integration'],
cwd=lca_ci_path,
capture_output=True,
text=True,
)
if result.returncode != 0:
print(f'Warning switching repository: {result.stderr}')
# Move and rename config_lca.yaml (overwrite if exists)
lca_ci_config_path = os.path.join(lca_ci_path, 'config.yaml')
print(f'Copying config.yaml to {lca_ci_config_path}')
shutil.copy(config_path, lca_ci_config_path)
# Run poetry install in LCA_CI_PATH
print(f"Running 'poetry install' in {lca_ci_path}")
result = subprocess.run(
['poetry', 'install'], cwd=lca_ci_path, capture_output=True, text=True
)
if result.returncode != 0:
print(f'Warning during poetry install: {result.stderr}')
if __name__ == '__main__':
setup()

View File

@@ -2,6 +2,8 @@
This folder contains the evaluation harness that we built on top of the original [SWE-Bench benchmark](https://www.swebench.com/) ([paper](https://arxiv.org/abs/2310.06770)).
**UPDATE (4/8/2025): We now support running SWT-Bench evaluation! For more details, checkout [the corresponding section](#SWT-Bench-Evaluation).**
**UPDATE (03/27/2025): We now support SWE-Bench multimodal evaluation! Simply use "princeton-nlp/SWE-bench_Multimodal" as the dataset name in the `run_infer.sh` script to evaluate on multimodal instances.**
**UPDATE (2/18/2025): We now support running SWE-Gym using the same evaluation harness here. For more details, checkout [this README](./SWE-Gym.md).**
@@ -141,7 +143,7 @@ With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patc
./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh $YOUR_OUTPUT_JSONL [instance_id] [dataset_name] [split]
# Example
./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/swe_bench/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Lite/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
```
The script now accepts optional arguments:
@@ -182,3 +184,58 @@ To clean-up all existing runtimes that you've already started, run:
```bash
ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
```
## SWT-Bench Evaluation
[SWT-Bench](https://swtbench.com/) ([paper](https://arxiv.org/abs/2406.12952)) is a benchmark for evaluating the capability of LLMs at creating unit tests. It is performed on the same instances as SWE-Bench, but requires a separate evaluation harness to capture coverage and issue reproduction. We therefore detail below how to leverage the inference script in this folder to run inference on SWT-Bench and how to use the SWT-Bench evaluation harness to evaluate them.
### Run inference on SWT-Bench
To run inference on SWT-Bench, you can use the same `run_infer.sh` script as described for evaluation on plain SWE-Bench. The only differences is that you need to specify the `mode` parameter to `swt` or `swt-ci` when running the script. For example, to run inference on SWT-Bench Verified, run the following command:
```bash
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [swe-dataset] test 1 swt
# Example - This runs evaluation on CodeActAgent for 500 instances on "SWT-bench_Verified"'s test set (corresponding to SWE-bench_Verified), with max 100 iteration per instances, with 1 number of workers running in parallel
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval_gpt4o-2024-11-20 HEAD CodeActAgent 500 100 1 princeton-nlp/SWE-bench_Verified test 1 swt
```
The two modes `swt` and `swt-ci` have the following effect:
- `swt`: This mode will change the prompt to instruct the agent to generate reproducing test cases instead of resolving the issue.
- `swt-ci`: In addition to the changes by `swt`, this mode sets up the CI environment by i) pre-installing the environment in the docker image, such that the test framework can be executed without errors and ii) telling the model the exact command to run the test framework.
### Run evaluation for SWT-bench
The evaluation of these results is done leveraging [the SWT-Bench evaluation harness](https://github.com/logic-star-ai/swt-bench/tree/master).
#### Extracting results into SWT-Bench harness format
In order to run evaluation of the obtained inference results in the SWT-Bench harness, we transform the results to a format that the SWT-Bench evaluation harness expects.
```bash
python3 evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py --prediction_file [output.jsonl] > [output_swt.jsonl]
# Example
python3 evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py --prediction_file "evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Verified-test/CodeActAgent/gpt-4o-2024-11-20_maxiter_100_N_v0.31.0-no-hint-swt-run_1/output.jsonl" > OpenHands-gpt-4o-2024-11-20.jsonl
```
#### Running the results in SWT-Bench
Next, we run the [SWT-Bench evaluation harness](https://github.com/logic-star-ai/swt-bench/tree/master) with these results.
First set-up and validate the setup as described in the harness [here](https://github.com/logic-star-ai/swt-bench/tree/master?tab=readme-ov-file#-set-up).
Then, run the evaluation with the following command:
```bash
# Example
python3 -m src.main \
--dataset_name princeton-nlp/SWE-bench_Verified \
--predictions_path <pathTo>/OpenHands-gpt-4o-2024-11-20.jsonl \
--max_workers 12 \
--run_id OpenHands-CodeAct-gpt-4o-2024-11-20 --patch_types vanilla --build_mode api
```
The results of the evaluation can be obtained by running the reporting script of the harness.
```bash
# Example
python -m src.report run_instance_swt_logs/OpenHands-CodeAct-gpt-4o-2024-11-20/OpenHands__CodeActAgent__gpt-4o-2024-11-20 --dataset verified
```

View File

@@ -0,0 +1,842 @@
# Based on https://github.com/logic-star-ai/swt-bench/blob/master/src/constants.py
# Constants - Installation Specifications
MAP_VERSION_TO_INSTALL_SKLEARN = {
k: {
'python': '3.6',
'packages': 'numpy scipy cython pytest pandas matplotlib',
'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
'pip_packages': [
'cython',
'numpy==1.19.2',
'setuptools',
'scipy==1.5.2',
],
}
for k in ['0.20', '0.21', '0.22']
}
MAP_VERSION_TO_INSTALL_SKLEARN.update(
{
k: {
'python': '3.9',
'packages': "'numpy==1.19.2' 'scipy==1.5.2' 'cython==3.0.10' pytest 'pandas<2.0.0' 'matplotlib<3.9.0' setuptools pytest joblib threadpoolctl",
'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
'pip_packages': ['cython', 'setuptools', 'numpy', 'scipy'],
}
for k in ['1.3', '1.4']
}
)
MAP_VERSION_TO_INSTALL_FLASK = {
'2.0': {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': [
'setuptools==70.0.0',
'Werkzeug==2.3.7',
'Jinja2==3.0.1',
'itsdangerous==2.1.2',
'click==8.0.1',
'MarkupSafe==2.1.3',
],
},
'2.1': {
'python': '3.10',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': [
'click==8.1.3',
'itsdangerous==2.1.2',
'Jinja2==3.1.2',
'MarkupSafe==2.1.1',
'Werkzeug==2.3.7',
],
},
}
MAP_VERSION_TO_INSTALL_FLASK.update(
{
k: {
'python': '3.11',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': [
'click==8.1.3',
'itsdangerous==2.1.2',
'Jinja2==3.1.2',
'MarkupSafe==2.1.1',
'Werkzeug==2.3.7',
],
}
for k in ['2.2', '2.3']
}
)
MAP_VERSION_TO_INSTALL_DJANGO = {
k: {
'python': '3.5',
'packages': 'requirements.txt',
'pre_install': [
'apt-get update && apt-get install -y locales',
"echo 'en_US UTF-8' > /etc/locale.gen",
'locale-gen en_US.UTF-8',
],
'install': 'python setup.py install',
'pip_packages': ['setuptools'],
'eval_commands': [
'export LANG=en_US.UTF-8',
'export LC_ALL=en_US.UTF-8',
'export PYTHONIOENCODING=utf8',
'export LANGUAGE=en_US:en',
],
}
for k in ['1.7', '1.8', '1.9', '1.10', '1.11', '2.0', '2.1', '2.2']
}
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {'python': '3.5', 'install': 'python setup.py install'}
for k in ['1.4', '1.5', '1.6']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.6',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'eval_commands': [
"sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen",
'export LANG=en_US.UTF-8',
'export LANGUAGE=en_US:en',
'export LC_ALL=en_US.UTF-8',
],
}
for k in ['3.0', '3.1', '3.2']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.8',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in ['4.0']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in ['4.1', '4.2']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.11',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in ['5.0']
}
)
MAP_VERSION_TO_INSTALL_REQUESTS = {
k: {'python': '3.9', 'packages': 'pytest', 'install': 'python -m pip install .'}
for k in ['0.7', '0.8', '0.9', '0.11', '0.13', '0.14', '1.1', '1.2', '2.0', '2.2']
+ ['2.3', '2.4', '2.5', '2.7', '2.8', '2.9', '2.10', '2.11', '2.12', '2.17']
+ ['2.18', '2.19', '2.22', '2.26', '2.25', '2.27', '3.0']
}
MAP_VERSION_TO_INSTALL_SEABORN = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .',
'pip_packages': [
'contourpy==1.1.0',
'cycler==0.11.0',
'fonttools==4.42.1',
'importlib-resources==6.0.1',
'kiwisolver==1.4.5',
'matplotlib==3.7.2',
'numpy==1.25.2',
'packaging==23.1',
'pandas==1.3.5', # 2.0.3
'pillow==10.0.0',
'pyparsing==3.0.9',
'pytest',
'python-dateutil==2.8.2',
'pytz==2023.3.post1',
'scipy==1.11.2',
'six==1.16.0',
'tzdata==2023.1',
'zipp==3.16.2',
],
}
for k in ['0.11']
}
MAP_VERSION_TO_INSTALL_SEABORN.update(
{
k: {
'python': '3.9',
'install': 'python -m pip install -e .[dev]',
'pip_packages': [
'contourpy==1.1.0',
'cycler==0.11.0',
'fonttools==4.42.1',
'importlib-resources==6.0.1',
'kiwisolver==1.4.5',
'matplotlib==3.7.2',
'numpy==1.25.2',
'packaging==23.1',
'pandas==2.0.0',
'pillow==10.0.0',
'pyparsing==3.0.9',
'pytest',
'python-dateutil==2.8.2',
'pytz==2023.3.post1',
'scipy==1.11.2',
'six==1.16.0',
'tzdata==2023.1',
'zipp==3.16.2',
],
}
for k in ['0.12', '0.13']
}
)
MAP_VERSION_TO_INSTALL_PYTEST = {
k: {'python': '3.9', 'install': 'python -m pip install -e .'}
for k in [
'4.4',
'4.5',
'4.6',
'5.0',
'5.1',
'5.2',
'5.3',
'5.4',
'6.0',
'6.2',
'6.3',
'7.0',
'7.1',
'7.2',
'7.4',
'8.0',
]
}
MAP_VERSION_TO_INSTALL_PYTEST['4.4']['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'pluggy==0.13.1',
'py==1.11.0',
'setuptools==68.0.0',
'six==1.16.0',
]
MAP_VERSION_TO_INSTALL_PYTEST['4.5']['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'pluggy==0.11.0',
'py==1.11.0',
'setuptools==68.0.0',
'six==1.16.0',
'wcwidth==0.2.6',
]
MAP_VERSION_TO_INSTALL_PYTEST['4.6']['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'six==1.16.0',
'wcwidth==0.2.6',
]
for k in ['5.0', '5.1', '5.2']:
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'wcwidth==0.2.6',
]
MAP_VERSION_TO_INSTALL_PYTEST['5.3']['pip_packages'] = [
'attrs==23.1.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'wcwidth==0.2.6',
]
MAP_VERSION_TO_INSTALL_PYTEST['5.4']['pip_packages'] = [
'py==1.11.0',
'packaging==23.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'pluggy==0.13.1',
]
MAP_VERSION_TO_INSTALL_PYTEST['6.0']['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'toml==0.10.2',
]
for k in ['6.2', '6.3']:
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'toml==0.10.2',
]
MAP_VERSION_TO_INSTALL_PYTEST['7.0']['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
]
for k in ['7.1', '7.2']:
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'tomli==2.0.1',
]
MAP_VERSION_TO_INSTALL_PYTEST['7.4']['pip_packages'] = [
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==1.3.0',
'exceptiongroup==1.1.3',
'tomli==2.0.1',
]
MAP_VERSION_TO_INSTALL_PYTEST['8.0']['pip_packages'] = [
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==1.3.0',
'exceptiongroup==1.1.3',
'tomli==2.0.1',
]
MAP_VERSION_TO_INSTALL_MATPLOTLIB = {
k: {
'python': '3.11',
'packages': 'environment.yml',
'install': 'python -m pip install -e .',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super dvipng'
],
'pip_packages': [
'contourpy==1.1.0',
'cycler==0.11.0',
'fonttools==4.42.1',
'ghostscript',
'kiwisolver==1.4.5',
'numpy==1.25.2',
'packaging==23.1',
'pillow==10.0.0',
'pikepdf',
'pyparsing==3.0.9',
'python-dateutil==2.8.2',
'six==1.16.0',
'setuptools==68.1.2',
'setuptools-scm==7.1.0',
'typing-extensions==4.7.1',
],
}
for k in ['3.5', '3.6', '3.7']
}
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
{
k: {
'python': '3.8',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super'
],
'pip_packages': ['pytest', 'ipython'],
}
for k in ['3.1', '3.2', '3.3', '3.4']
}
)
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
{
k: {
'python': '3.7',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config'
],
'pip_packages': ['pytest'],
}
for k in ['3.0']
}
)
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
{
k: {
'python': '3.5',
'install': 'python setup.py build; python setup.py install',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && && apt-get install -y imagemagick ffmpeg'
],
'pip_packages': ['pytest'],
'execute_test_as_nonroot': True,
}
for k in ['2.0', '2.1', '2.2', '1.0', '1.1', '1.2', '1.3', '1.4', '1.5']
}
)
MAP_VERSION_TO_INSTALL_SPHINX = {
k: {
'python': '3.9',
'pip_packages': ['tox==4.16.0', 'tox-current-env==0.0.11'],
'install': 'python -m pip install -e .[test]',
'pre_install': ["sed -i 's/pytest/pytest -rA/' tox.ini"],
}
for k in ['1.5', '1.6', '1.7', '1.8', '2.0', '2.1', '2.2', '2.3', '2.4', '3.0']
+ ['3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']
+ ['4.5', '5.0', '5.1', '5.2', '5.3', '6.0', '6.2', '7.0', '7.1', '7.2']
}
for k in ['3.0', '3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']:
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
"sed -i 's/Jinja2>=2.3/Jinja2<3.0/' setup.py",
"sed -i 's/sphinxcontrib-applehelp/sphinxcontrib-applehelp<=1.0.7/' setup.py",
"sed -i 's/sphinxcontrib-devhelp/sphinxcontrib-devhelp<=1.0.5/' setup.py",
"sed -i 's/sphinxcontrib-qthelp/sphinxcontrib-qthelp<=1.0.6/' setup.py",
"sed -i 's/alabaster>=0.7,<0.8/alabaster>=0.7,<0.7.12/' setup.py",
"sed -i \"s/'packaging',/'packaging', 'markupsafe<=2.0.1',/\" setup.py",
]
)
if k in ['4.2', '4.3', '4.4']:
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
"sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py",
"sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py",
]
)
elif k == '4.1':
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
(
"grep -q 'sphinxcontrib-htmlhelp>=2.0.0' setup.py && "
"sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py || "
"sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py"
),
(
"grep -q 'sphinxcontrib-serializinghtml>=1.1.5' setup.py && "
"sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py || "
"sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py"
),
]
)
else:
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
"sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py",
"sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py",
]
)
MAP_VERSION_TO_INSTALL_SPHINX['7.2']['pre_install'] += [
'apt-get update && apt-get install -y graphviz'
]
MAP_VERSION_TO_INSTALL_ASTROPY = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .[test] --verbose',
'pip_packages': [
'attrs==23.1.0',
'exceptiongroup==1.1.3',
'execnet==2.0.2',
'hypothesis==6.82.6',
'iniconfig==2.0.0',
'numpy==1.25.2',
'packaging==23.1',
'pluggy==1.3.0',
'psutil==5.9.5',
'pyerfa==2.0.0.3',
'pytest-arraydiff==0.5.0',
'pytest-astropy-header==0.2.2',
'pytest-astropy==0.10.0',
'pytest-cov==4.1.0',
'pytest-doctestplus==1.0.0',
'pytest-filter-subpackage==0.1.2',
'pytest-mock==3.11.1',
'pytest-openfiles==0.5.0',
'pytest-remotedata==0.4.0',
'pytest-xdist==3.3.1',
'pytest==7.4.0',
'PyYAML==6.0.1',
'setuptools==68.0.0',
'sortedcontainers==2.4.0',
'tomli==2.0.1',
],
}
for k in ['0.1', '0.2', '0.3', '0.4', '1.1', '1.2', '1.3', '3.0', '3.1', '3.2']
+ ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']
}
for k in ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']:
MAP_VERSION_TO_INSTALL_ASTROPY[k]['pre_install'] = [
'sed -i \'s/requires = \\["setuptools",/requires = \\["setuptools==68.0.0",/\' pyproject.toml'
]
MAP_VERSION_TO_INSTALL_SYMPY = {
k: {
'python': '3.9',
'packages': 'mpmath flake8',
'pip_packages': ['mpmath==1.3.0', 'flake8-comprehensions'],
'install': 'python -m pip install -e .',
}
for k in ['0.7', '1.0', '1.1', '1.10', '1.11', '1.12', '1.2', '1.4', '1.5', '1.6']
+ ['1.7', '1.8', '1.9']
}
MAP_VERSION_TO_INSTALL_SYMPY.update(
{
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': ['mpmath==1.3.0'],
}
for k in ['1.13']
}
)
MAP_VERSION_TO_INSTALL_PYLINT = {
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in [
'2.10',
'2.11',
'2.13',
'2.14',
'2.15',
'2.16',
'2.17',
'2.8',
'2.9',
'3.0',
]
}
MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pip_packages'] = ['pyenchant==3.2']
MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pre_install'] = [
'apt-get update && apt-get install -y libenchant-2-dev hunspell-en-us'
]
MAP_VERSION_TO_INSTALL_PYLINT.update(
{
k: {
**MAP_VERSION_TO_INSTALL_PYLINT[k],
'pip_packages': ['astroid==3.0.0a6', 'setuptools'],
}
for k in ['3.0']
}
)
MAP_VERSION_TO_INSTALL_XARRAY = {
k: {
'python': '3.10',
'packages': 'environment.yml',
'install': 'python -m pip install -e .',
'pip_packages': [
'numpy==1.23.0',
'packaging==23.1',
'pandas==1.5.3',
'pytest==7.4.0',
'python-dateutil==2.8.2',
'pytz==2023.3',
'six==1.16.0',
'scipy==1.11.1',
'setuptools==68.0.0',
],
'no_use_env': True,
}
for k in ['0.12', '0.18', '0.19', '0.20', '2022.03', '2022.06', '2022.09']
}
MAP_VERSION_TO_INSTALL_SQLFLUFF = {
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in [
'0.10',
'0.11',
'0.12',
'0.13',
'0.4',
'0.5',
'0.6',
'0.8',
'0.9',
'1.0',
'1.1',
'1.2',
'1.3',
'1.4',
'2.0',
'2.1',
'2.2',
]
}
MAP_VERSION_TO_INSTALL_DBT_CORE = {
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in [
'0.13',
'0.14',
'0.15',
'0.16',
'0.17',
'0.18',
'0.19',
'0.20',
'0.21',
'1.0',
'1.1',
'1.2',
'1.3',
'1.4',
'1.5',
'1.6',
'1.7',
]
}
MAP_VERSION_TO_INSTALL_PYVISTA = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .',
'pip_packages': ['pytest'],
}
for k in ['0.20', '0.21', '0.22', '0.23']
}
MAP_VERSION_TO_INSTALL_PYVISTA.update(
{
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': ['pytest'],
}
for k in [
'0.24',
'0.25',
'0.26',
'0.27',
'0.28',
'0.29',
'0.30',
'0.31',
'0.32',
'0.33',
'0.34',
'0.35',
'0.36',
'0.37',
'0.38',
'0.39',
'0.40',
'0.41',
'0.42',
'0.43',
]
}
)
MAP_VERSION_TO_INSTALL_ASTROID = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .',
'pip_packages': ['pytest'],
}
for k in [
'2.10',
'2.12',
'2.13',
'2.14',
'2.15',
'2.16',
'2.5',
'2.6',
'2.7',
'2.8',
'2.9',
'3.0',
]
}
MAP_VERSION_TO_INSTALL_MARSHMALLOW = {
k: {
'python': '3.9',
'install': "python -m pip install -e '.[dev]'",
}
for k in [
'2.18',
'2.19',
'2.20',
'3.0',
'3.1',
'3.10',
'3.11',
'3.12',
'3.13',
'3.15',
'3.16',
'3.19',
'3.2',
'3.4',
'3.8',
'3.9',
]
}
MAP_VERSION_TO_INSTALL_PVLIB = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .[all]',
'packages': 'pandas scipy',
'pip_packages': ['jupyter', 'ipython', 'matplotlib', 'pytest', 'flake8'],
}
for k in ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']
}
MAP_VERSION_TO_INSTALL_PYDICOM = {
k: {'python': '3.6', 'install': 'python -m pip install -e .', 'packages': 'numpy'}
for k in [
'1.0',
'1.1',
'1.2',
'1.3',
'1.4',
'2.0',
'2.1',
'2.2',
'2.3',
'2.4',
'3.0',
]
}
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.8'} for k in ['1.4', '2.0']}
)
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.9'} for k in ['2.1', '2.2']}
)
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.10'} for k in ['2.3']}
)
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.11'} for k in ['2.4', '3.0']}
)
MAP_VERSION_TO_INSTALL_HUMANEVAL = {k: {'python': '3.9'} for k in ['1.0']}
MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX = {
k: {'python': '3.10', 'packages': 'pytest'} for k in ['0.0.1']
}
# Constants - Task Instance Instllation Environment
MAP_VERSION_TO_INSTALL = {
'astropy/astropy': MAP_VERSION_TO_INSTALL_ASTROPY,
'dbt-labs/dbt-core': MAP_VERSION_TO_INSTALL_DBT_CORE,
'django/django': MAP_VERSION_TO_INSTALL_DJANGO,
'matplotlib/matplotlib': MAP_VERSION_TO_INSTALL_MATPLOTLIB,
'marshmallow-code/marshmallow': MAP_VERSION_TO_INSTALL_MARSHMALLOW,
'mwaskom/seaborn': MAP_VERSION_TO_INSTALL_SEABORN,
'pallets/flask': MAP_VERSION_TO_INSTALL_FLASK,
'psf/requests': MAP_VERSION_TO_INSTALL_REQUESTS,
'pvlib/pvlib-python': MAP_VERSION_TO_INSTALL_PVLIB,
'pydata/xarray': MAP_VERSION_TO_INSTALL_XARRAY,
'pydicom/pydicom': MAP_VERSION_TO_INSTALL_PYDICOM,
'pylint-dev/astroid': MAP_VERSION_TO_INSTALL_ASTROID,
'pylint-dev/pylint': MAP_VERSION_TO_INSTALL_PYLINT,
'pytest-dev/pytest': MAP_VERSION_TO_INSTALL_PYTEST,
'pyvista/pyvista': MAP_VERSION_TO_INSTALL_PYVISTA,
'scikit-learn/scikit-learn': MAP_VERSION_TO_INSTALL_SKLEARN,
'sphinx-doc/sphinx': MAP_VERSION_TO_INSTALL_SPHINX,
'sqlfluff/sqlfluff': MAP_VERSION_TO_INSTALL_SQLFLUFF,
'swe-bench/humaneval': MAP_VERSION_TO_INSTALL_HUMANEVAL,
'nielstron/humaneval_fix': MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX,
'sympy/sympy': MAP_VERSION_TO_INSTALL_SYMPY,
}
# Constants - Repository Specific Installation Instructions
MAP_REPO_TO_INSTALL = {}
# Constants - Task Instance Test Frameworks
TEST_PYTEST_VERBOSE = 'pytest -rA --tb=long -p no:cacheprovider'
MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE = {
'astropy/astropy': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROPY.keys()
},
'django/django': {
k: './tests/runtests.py --verbosity 2 --settings=test_sqlite --parallel 1'
for k in MAP_VERSION_TO_INSTALL_DJANGO.keys()
},
'marshmallow-code/marshmallow': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MARSHMALLOW.keys()
},
'matplotlib/matplotlib': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MATPLOTLIB.keys()
},
'mwaskom/seaborn': {
k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_SEABORN.keys()
},
'pallets/flask': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_FLASK.keys()
},
'psf/requests': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_REQUESTS.keys()
},
'pvlib/pvlib-python': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PVLIB.keys()
},
'pydata/xarray': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_XARRAY.keys()
},
'pydicom/pydicom': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYDICOM.keys()
},
'pylint-dev/astroid': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROID.keys()
},
'pylint-dev/pylint': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYLINT.keys()
},
'pytest-dev/pytest': {
k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_PYTEST.keys()
},
'pyvista/pyvista': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYVISTA.keys()
},
'scikit-learn/scikit-learn': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SKLEARN.keys()
},
'sphinx-doc/sphinx': {
k: 'tox -epy39 -v --' for k in MAP_VERSION_TO_INSTALL_SPHINX.keys()
},
'sqlfluff/sqlfluff': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SQLFLUFF.keys()
},
'swe-bench/humaneval': {
k: 'python' for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
},
'nielstron/humaneval_fix': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
},
'sympy/sympy': {
k: 'bin/test -C --verbose' for k in MAP_VERSION_TO_INSTALL_SYMPY.keys()
},
}
MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE['django/django']['1.9'] = (
'./tests/runtests.py --verbosity 2'
)

View File

@@ -3,7 +3,7 @@ import copy
import json
import os
import tempfile
from typing import Any
from typing import Any, Literal
import pandas as pd
import toml
@@ -17,6 +17,11 @@ from evaluation.benchmarks.swe_bench.binary_patch_utils import (
from evaluation.benchmarks.swe_bench.resource.mapping import (
get_instance_resource_factor,
)
from evaluation.benchmarks.swe_bench.resource.swt_bench_constants import (
MAP_REPO_TO_INSTALL,
MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE,
MAP_VERSION_TO_INSTALL,
)
from evaluation.utils.shared import (
EvalException,
EvalMetadata,
@@ -55,6 +60,7 @@ from openhands.utils.shutdown_listener import sleep_if_should_continue
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
BenchMode = Literal['swe', 'swt', 'swt-ci']
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
@@ -68,7 +74,36 @@ def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageAction:
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
instruction = f"""
mode = metadata.details['mode']
if mode.startswith('swt'):
test_instructions = (
f'The following command can be used to run the tests: `{list(MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE[instance.repo].values())[0]}`. Make sure they fail in the expected way.\n'
if mode.endswith('ci')
else ''
)
instruction = f"""\
<uploaded_files>
/workspace/{workspace_dir_name}
</uploaded_files>
I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:
<issue_description>
{instance.problem_statement}
</issue_description>
Can you help me implement the necessary changes to the repository to test whether the issue in <issue_description> was resolved?
I will take care of all changes to any of the non-test files. This means you DON'T have to modify the actual logic and ONLY have to update test logic and tests!
Your task is to make the minimal changes to tests files in the /workspace directory to reproduce the issue in the <issue_description>, i.e., such that the generated tests fail in the current state (where the issue is unresolved) and pass when the issue will be resolved.
Follow these steps to reproduce the issue:
1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.
2. Create a script `reproduction.py` to reproduce the error and execute it with `python reproduction.py` using the BashTool, to confirm the error
3. Edit the sourcecode of the repo to integrate your reproduction script into the test framework
4. Run the test framework and make sure your tests fail! Only submit FAILING tests! Never submit passing tests.
{test_instructions}Your thinking should be thorough and so it's fine if it's very long.
"""
else:
instruction = f"""
<uploaded_files>
/workspace/{workspace_dir_name}
</uploaded_files>
@@ -356,6 +391,30 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
if metadata.details['mode'] == 'swt-ci':
# set up repo
setup_commands = []
if instance['repo'] in MAP_REPO_TO_INSTALL:
setup_commands.append(MAP_REPO_TO_INSTALL[instance['repo']])
# Run pre-install set up if provided
install = MAP_VERSION_TO_INSTALL.get(instance['repo'], {}).get(
instance['version'], []
)
if 'pre_install' in install:
for pre_install in install['pre_install']:
setup_commands.append(pre_install)
if 'install' in install:
setup_commands.append(install['install'])
for command in setup_commands:
action = CmdRunAction(command=command)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if 'multimodal' not in metadata.dataset.lower():
# Only for non-multimodal datasets, we need to activate the testbed environment for Python
# SWE-Bench multimodal datasets are not using the testbed environment
@@ -678,6 +737,13 @@ if __name__ == '__main__':
default='test',
help='split to evaluate on',
)
parser.add_argument(
'--mode',
type=str,
default='swe',
choices=['swe', 'swt', 'swt-ci'],
help="mode to run the evaluation, either 'swe', 'swt', or 'swt-ci'",
)
args, _ = parser.parse_known_args()
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
@@ -714,7 +780,7 @@ if __name__ == '__main__':
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
details = {}
details = {'mode': args.mode}
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
dataset_descrption = (

View File

@@ -12,6 +12,7 @@ NUM_WORKERS=$6
DATASET=$7
SPLIT=$8
N_RUNS=$9
MODE=${10}
if [ -z "$NUM_WORKERS" ]; then
NUM_WORKERS=1
@@ -45,6 +46,11 @@ if [ -z "$SPLIT" ]; then
SPLIT="test"
fi
if [ -z "$MODE" ]; then
MODE="swe"
echo "MODE not specified, use default $MODE"
fi
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
@@ -55,6 +61,10 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
echo "MODEL_CONFIG: $MODEL_CONFIG"
echo "DATASET: $DATASET"
echo "SPLIT: $SPLIT"
echo "MAX_ITER: $MAX_ITER"
echo "NUM_WORKERS: $NUM_WORKERS"
echo "COMMIT_HASH: $COMMIT_HASH"
echo "MODE: $MODE"
# Default to NOT use Hint
if [ -z "$USE_HINT_TEXT" ]; then
@@ -74,9 +84,13 @@ fi
if [ -n "$EXP_NAME" ]; then
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
fi
# if mode != swe, add mode to the eval note
if [ "$MODE" != "swe" ]; then
EVAL_NOTE="${EVAL_NOTE}-${MODE}"
fi
function run_eval() {
local eval_note=$1
local eval_note="${1}"
COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
--agent-cls $AGENT \
--llm-config $MODEL_CONFIG \
@@ -84,7 +98,8 @@ function run_eval() {
--eval-num-workers $NUM_WORKERS \
--eval-note $eval_note \
--dataset $DATASET \
--split $SPLIT"
--split $SPLIT \
--mode $MODE"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"

View File

@@ -0,0 +1,95 @@
import argparse
import json
import logging
import unidiff
from evaluation.benchmarks.swe_bench.resource.swt_bench_constants import (
MAP_VERSION_TO_INSTALL,
)
_LOGGER = logging.getLogger(__name__)
def remove_setup_files(model_patch: str, instance: dict, delete_setup_changes: bool):
"""Discard all changes that a patch applies to files changes by the pre_install script and that are reproduction scripts (top-level script)"""
setup_files = ['setup.py', 'tox.ini', 'pyproject.toml']
pre_install = (
MAP_VERSION_TO_INSTALL.get(instance['repo'], {})
.get(instance['version'], {})
.get('pre_install', [])
)
relevant_files = (
[
file
for file in setup_files
if any(file in install and 'sed' in install for install in pre_install)
]
if delete_setup_changes
else []
)
for i in range(10):
try:
# Appearently outputs.jsonl has .strip() applied, so we try to reconstruct the original patch by adding auxiliary whitespace
patch = unidiff.PatchSet(model_patch + i * '\n')
break
except unidiff.UnidiffParseError:
pass
to_delete = []
for i, file in enumerate(patch):
if (
any(f in file.source_file for f in relevant_files)
or file.target_file.count('/') == 1
):
to_delete.append(i)
for i in reversed(to_delete):
del patch[i]
return str(patch)
def main(
prediction_file: str,
):
"""Main function to extract the model patches from the OpenHands prediction file and turn them into the expected SWT-Bench format."""
with open(prediction_file) as f:
for line in f:
pred = json.loads(line)
try:
git_diff = pred['test_result']['git_patch']
except KeyError:
_LOGGER.warning(
'Warning: No git diff found for instance %s', pred['instance_id']
)
continue
ci_mode = pred['metadata']['details'].get('mode', '') == 'swt-ci'
try:
git_diff = remove_setup_files(git_diff, pred['instance'], ci_mode)
except: # noqa: E722
_LOGGER.warning(
'Warning: Invalid git diff found for instance %s',
pred['instance_id'],
)
print(
json.dumps(
{
'instance_id': pred['instance_id'],
'model_name_or_path': f'{pred["metadata"]["llm_config"]["openrouter_app_name"]}__{pred["metadata"]["agent_class"]}__{pred["metadata"]["llm_config"]["model"]}',
'model_patch': git_diff,
'full_output': json.dumps(pred),
}
)
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--prediction_file',
type=str,
required=True,
help='Path to the prediction file (.../outputs.jsonl)',
)
args = parser.parse_args()
main(args.prediction_file)

View File

@@ -1,3 +1,10 @@
# Run frontend checks
echo "Running frontend checks..."
cd frontend
npm run check-unlocalized-strings
npx lint-staged
# Run backend pre-commit
echo "Running backend pre-commit..."
cd ..
pre-commit run --files openhands/**/* evaluation/**/* tests/**/* --show-diff-on-failure --config ./dev_config/python/.pre-commit-config.yaml

View File

@@ -223,7 +223,7 @@ describe("ChatInput", () => {
render(<ChatInput onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
expect(textarea).toBeInTheDocument();
// The actual verification of maxRows=16 is handled internally by the TextareaAutosize component
// and affects how many rows the textarea can expand to
});

View File

@@ -1,8 +1,8 @@
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
import type { Message } from "#/message";
import { act, screen, waitFor, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { renderWithProviders } from "test-utils";
import type { Message } from "#/message";
import { addUserMessage } from "#/state/chat-slice";
import { SUGGESTIONS } from "#/utils/suggestions";
import * as ChatSlice from "#/state/chat-slice";
@@ -45,7 +45,15 @@ describe("Empty state", () => {
it("should render suggestions if empty", () => {
const { store } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: { messages: [] },
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});
@@ -68,7 +76,15 @@ describe("Empty state", () => {
it("should render the default suggestions", () => {
renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: { messages: [] },
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});
@@ -98,7 +114,15 @@ describe("Empty state", () => {
const user = userEvent.setup();
const { store } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: { messages: [] },
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});
@@ -127,7 +151,15 @@ describe("Empty state", () => {
const user = userEvent.setup();
const { rerender } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: { messages: [] },
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});

View File

@@ -95,6 +95,23 @@ describe("ExpandableMessage", () => {
expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
});
it("should render with neutral border and no icon for action messages with undefined success (timeout case)", () => {
renderWithProviders(
<ExpandableMessage
id="OBSERVATION_MESSAGE$RUN"
message="Command timed out"
type="action"
success={undefined}
/>,
);
const element = screen.getByText("OBSERVATION_MESSAGE$RUN");
const container = element.closest(
"div.flex.gap-2.items-center.justify-start",
);
expect(container).toHaveClass("border-neutral-300");
expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
});
it("should render the out of credits message when the user is out of credits", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - We only care about the APP_MODE and FEATURE_FLAGS fields

View File

@@ -3,34 +3,46 @@ import { it, describe, expect, vi, beforeAll, afterAll } from "vitest";
import userEvent from "@testing-library/user-event";
import { AuthModal } from "#/components/features/waitlist/auth-modal";
import * as CaptureConsent from "#/utils/handle-capture-consent";
import * as AuthHook from "#/context/auth-context";
describe("AuthModal", () => {
beforeAll(() => {
vi.stubGlobal("location", { href: "" });
vi.spyOn(AuthHook, "useAuth").mockReturnValue({
providersAreSet: false,
setProvidersAreSet: vi.fn(),
providerTokensSet: [],
setProviderTokensSet: vi.fn()
});
});
afterAll(() => {
vi.unstubAllGlobals();
vi.restoreAllMocks();
});
it("should render a tos checkbox that is unchecked by default", () => {
render(<AuthModal githubAuthUrl={null} />);
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
const checkbox = screen.getByRole("checkbox");
expect(checkbox).not.toBeChecked();
});
it("should only enable the GitHub button if the tos checkbox is checked", async () => {
it("should only enable the identity provider buttons if the tos checkbox is checked", async () => {
const user = userEvent.setup();
render(<AuthModal githubAuthUrl={null} />);
const checkbox = screen.getByRole("checkbox");
const button = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
expect(button).toBeDisabled();
const checkbox = screen.getByRole("checkbox");
const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
const gitlabButton = screen.getByRole("button", { name: "GITLAB$CONNECT_TO_GITLAB" });
expect(githubButton).toBeDisabled();
expect(gitlabButton).toBeDisabled();
await user.click(checkbox);
expect(button).not.toBeDisabled();
expect(githubButton).not.toBeDisabled();
expect(gitlabButton).not.toBeDisabled();
});
it("should set user analytics consent to true when the user checks the tos checkbox", async () => {
@@ -40,7 +52,7 @@ describe("AuthModal", () => {
);
const user = userEvent.setup();
render(<AuthModal githubAuthUrl="mock-url" />);
render(<AuthModal githubAuthUrl="mock-url" appMode="saas" />);
const checkbox = screen.getByRole("checkbox");
await user.click(checkbox);

View File

@@ -56,12 +56,16 @@ describe("GitRepositorySelector", () => {
full_name: "test/repo1",
git_provider: "github" as Provider,
stargazers_count: 100,
is_public: true,
pushed_at: "2023-01-01T00:00:00Z",
},
{
id: 2,
full_name: "test/repo2",
git_provider: "github" as Provider,
stargazers_count: 200,
is_public: true,
pushed_at: "2023-01-02T00:00:00Z",
},
];

View File

@@ -0,0 +1,72 @@
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
import { render, screen } from "@testing-library/react";
import { Provider } from "react-redux";
import { createRoutesStub } from "react-router";
import { setupStore } from "test-utils";
import { describe, expect, it, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import { AuthProvider } from "#/context/auth-context";
import { HomeHeader } from "#/components/features/home/home-header";
import OpenHands from "#/api/open-hands";
const renderHomeHeader = () => {
const RouterStub = createRoutesStub([
{
Component: HomeHeader,
path: "/",
},
{
Component: () => <div data-testid="conversation-screen" />,
path: "/conversations/:conversationId",
},
]);
return render(<RouterStub />, {
wrapper: ({ children }) => (
<Provider store={setupStore()}>
<AuthProvider initialProvidersAreSet>
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
</AuthProvider>
</Provider>
),
});
};
describe("HomeHeader", () => {
it("should create an empty conversation and redirect when pressing the launch from scratch button", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderHomeHeader();
const launchButton = screen.getByRole("button", {
name: /launch from scratch/i,
});
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
"gui",
undefined,
undefined,
[],
undefined,
undefined,
);
// expect to be redirected to /conversations/:conversationId
await screen.findByTestId("conversation-screen");
});
it("should change the launch button text to 'Loading...' when creating a conversation", async () => {
renderHomeHeader();
const launchButton = screen.getByRole("button", {
name: /launch from scratch/i,
});
await userEvent.click(launchButton);
expect(launchButton).toHaveTextContent(/Loading/i);
expect(launchButton).toBeDisabled();
});
});

View File

@@ -0,0 +1,231 @@
import { render, screen, waitFor, within } from "@testing-library/react";
import { describe, expect, it, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
import { setupStore } from "test-utils";
import { Provider } from "react-redux";
import { createRoutesStub } from "react-router";
import OpenHands from "#/api/open-hands";
import { AuthProvider } from "#/context/auth-context";
import { GitRepository } from "#/types/git";
import * as GitService from "#/api/git";
import { RepoConnector } from "#/components/features/home/repo-connector";
const renderRepoConnector = (initialProvidersAreSet = true) => {
const mockRepoSelection = vi.fn();
const RouterStub = createRoutesStub([
{
Component: () => <RepoConnector onRepoSelection={mockRepoSelection} />,
path: "/",
},
{
Component: () => <div data-testid="conversation-screen" />,
path: "/conversations/:conversationId",
},
{
Component: () => <div data-testid="settings-screen" />,
path: "/settings",
},
]);
return render(<RouterStub />, {
wrapper: ({ children }) => (
<Provider store={setupStore()}>
<AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
</AuthProvider>
</Provider>
),
});
};
const MOCK_RESPOSITORIES: GitRepository[] = [
{
id: 1,
full_name: "rbren/polaris",
git_provider: "github",
is_public: true,
},
{
id: 2,
full_name: "All-Hands-AI/OpenHands",
git_provider: "github",
is_public: true,
},
];
describe("RepoConnector", () => {
it("should render the repository connector section", () => {
renderRepoConnector();
screen.getByTestId("repo-connector");
});
it("should render the available repositories in the dropdown", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
// Wait for the loading state to be replaced with the dropdown
const dropdown = await waitFor(() => screen.getByTestId("repo-dropdown"));
await userEvent.click(dropdown);
await waitFor(() => {
screen.getByText("rbren/polaris");
screen.getByText("All-Hands-AI/OpenHands");
});
});
it("should only enable the launch button if a repo is selected", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
const launchButton = screen.getByTestId("repo-launch-button");
expect(launchButton).toBeDisabled();
// Wait for the loading state to be replaced with the dropdown
const dropdown = await waitFor(() => screen.getByTestId("repo-dropdown"));
await userEvent.click(dropdown);
await userEvent.click(screen.getByText("rbren/polaris"));
expect(launchButton).toBeEnabled();
});
it("should render the 'add git(hub|lab) repos' links if saas mode", async () => {
const getConfiSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return the APP_MODE
getConfiSpy.mockResolvedValue({
APP_MODE: "saas",
});
renderRepoConnector();
await screen.findByText("Add GitHub repos");
});
it("should not render the 'add git(hub|lab) repos' links if oss mode", async () => {
const getConfiSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return the APP_MODE
getConfiSpy.mockResolvedValue({
APP_MODE: "oss",
});
renderRepoConnector();
expect(screen.queryByText("Add GitHub repos")).not.toBeInTheDocument();
expect(screen.queryByText("Add GitLab repos")).not.toBeInTheDocument();
});
it("should create a conversation and redirect with the selected repo when pressing the launch button", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
const repoConnector = screen.getByTestId("repo-connector");
const launchButton =
within(repoConnector).getByTestId("repo-launch-button");
await userEvent.click(launchButton);
// repo not selected yet
expect(createConversationSpy).not.toHaveBeenCalled();
// select a repository from the dropdown
const dropdown = await waitFor(() =>
within(repoConnector).getByTestId("repo-dropdown"),
);
await userEvent.click(dropdown);
const repoOption = screen.getByText("rbren/polaris");
await userEvent.click(repoOption);
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
"gui",
{
full_name: "rbren/polaris",
git_provider: "github",
id: 1,
is_public: true,
},
undefined,
[],
undefined,
undefined,
);
});
it("should change the launch button text to 'Loading...' when creating a conversation", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
const launchButton = screen.getByTestId("repo-launch-button");
// Wait for the loading state to be replaced with the dropdown
const dropdown = await waitFor(() => screen.getByTestId("repo-dropdown"));
await userEvent.click(dropdown);
await userEvent.click(screen.getByText("rbren/polaris"));
await userEvent.click(launchButton);
expect(launchButton).toBeDisabled();
expect(launchButton).toHaveTextContent(/Loading/i);
});
it("should not display a button to settings if the user is signed in with their git provider", async () => {
renderRepoConnector(true);
expect(
screen.queryByTestId("navigate-to-settings-button"),
).not.toBeInTheDocument();
});
it("should display a button to settings if the user needs to sign in with their git provider", async () => {
renderRepoConnector(false);
const goToSettingsButton = await screen.findByTestId(
"navigate-to-settings-button",
);
const dropdown = screen.queryByTestId("repo-dropdown");
const launchButton = screen.queryByTestId("repo-launch-button");
const providerLinks = screen.queryAllByText(/add git(hub|lab) repos/i);
expect(dropdown).not.toBeInTheDocument();
expect(launchButton).not.toBeInTheDocument();
expect(providerLinks.length).toBe(0);
expect(goToSettingsButton).toBeInTheDocument();
await userEvent.click(goToSettingsButton);
await screen.findByTestId("settings-screen");
});
});

View File

@@ -0,0 +1,138 @@
import { render, screen } from "@testing-library/react";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import { Provider } from "react-redux";
import { createRoutesStub } from "react-router";
import { setupStore } from "test-utils";
import { SuggestedTask } from "#/components/features/home/tasks/task.types";
import OpenHands from "#/api/open-hands";
import { AuthProvider } from "#/context/auth-context";
import { TaskCard } from "#/components/features/home/tasks/task-card";
import * as GitService from "#/api/git";
import { GitRepository } from "#/types/git";
const MOCK_TASK_1: SuggestedTask = {
issue_number: 123,
repo: "repo1",
title: "Task 1",
task_type: "MERGE_CONFLICTS",
git_provider: "github",
};
const MOCK_TASK_2: SuggestedTask = {
issue_number: 456,
repo: "repo2",
title: "Task 2",
task_type: "FAILING_CHECKS",
git_provider: "github",
};
const MOCK_TASK_3: SuggestedTask = {
issue_number: 789,
repo: "repo3",
title: "Task 3",
task_type: "UNRESOLVED_COMMENTS",
git_provider: "gitlab",
};
const MOCK_TASK_4: SuggestedTask = {
issue_number: 101112,
repo: "repo4",
title: "Task 4",
task_type: "OPEN_ISSUE",
git_provider: "gitlab",
};
const MOCK_RESPOSITORIES: GitRepository[] = [
{ id: 1, full_name: "repo1", git_provider: "github", is_public: true },
{ id: 2, full_name: "repo2", git_provider: "github", is_public: true },
{ id: 3, full_name: "repo3", git_provider: "gitlab", is_public: true },
{ id: 4, full_name: "repo4", git_provider: "gitlab", is_public: true },
];
const renderTaskCard = (task = MOCK_TASK_1) => {
const RouterStub = createRoutesStub([
{
Component: () => <TaskCard task={task} />,
path: "/",
},
{
Component: () => <div data-testid="conversation-screen" />,
path: "/conversations/:conversationId",
},
]);
return render(<RouterStub />, {
wrapper: ({ children }) => (
<Provider store={setupStore()}>
<AuthProvider initialProvidersAreSet>
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
</AuthProvider>
</Provider>
),
});
};
describe("TaskCard", () => {
it("format the issue id", async () => {
renderTaskCard();
const taskId = screen.getByTestId("task-id");
expect(taskId).toHaveTextContent(/#123/i);
});
it("should call createConversation when clicking the launch button", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderTaskCard();
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalled();
});
describe("creating suggested task conversation", () => {
beforeEach(() => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
});
it("should call create conversation with suggest task trigger and selected suggested task", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderTaskCard(MOCK_TASK_1);
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledWith(
"suggested_task",
MOCK_RESPOSITORIES[0],
undefined,
[],
undefined,
MOCK_TASK_1,
);
});
});
it("should disable the launch button and update text content when creating a conversation", async () => {
renderTaskCard();
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(launchButton).toHaveTextContent(/Loading/i);
expect(launchButton).toBeDisabled();
});
});

View File

@@ -0,0 +1,113 @@
import { render, screen, waitFor } from "@testing-library/react";
import { afterEach, describe, expect, it, vi } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import { Provider } from "react-redux";
import { createRoutesStub } from "react-router";
import { setupStore } from "test-utils";
import userEvent from "@testing-library/user-event";
import { TaskSuggestions } from "#/components/features/home/tasks/task-suggestions";
import { SuggestionsService } from "#/api/suggestions-service/suggestions-service.api";
import { MOCK_TASKS } from "#/mocks/task-suggestions-handlers";
import { AuthProvider } from "#/context/auth-context";
const renderTaskSuggestions = (initialProvidersAreSet = true) => {
const RouterStub = createRoutesStub([
{
Component: TaskSuggestions,
path: "/",
},
{
Component: () => <div data-testid="conversation-screen" />,
path: "/conversations/:conversationId",
},
{
Component: () => <div data-testid="settings-screen" />,
path: "/settings",
},
]);
return render(<RouterStub />, {
wrapper: ({ children }) => (
<Provider store={setupStore()}>
<AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
</AuthProvider>
</Provider>
),
});
};
describe("TaskSuggestions", () => {
const getSuggestedTasksSpy = vi.spyOn(
SuggestionsService,
"getSuggestedTasks",
);
afterEach(() => {
vi.clearAllMocks();
});
it("should render the task suggestions section", () => {
renderTaskSuggestions();
screen.getByTestId("task-suggestions");
});
it("should render an empty message if there are no tasks", async () => {
getSuggestedTasksSpy.mockResolvedValue([]);
renderTaskSuggestions();
await screen.findByText(/No tasks available/i);
});
it("should render the task groups with the correct titles", async () => {
getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
renderTaskSuggestions();
await waitFor(() => {
MOCK_TASKS.forEach((taskGroup) => {
screen.getByText(taskGroup.title);
});
});
});
it("should render the task cards with the correct task details", async () => {
getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
renderTaskSuggestions();
await waitFor(() => {
MOCK_TASKS.forEach((task) => {
screen.getByText(task.title);
});
});
});
it("should render skeletons when loading", async () => {
getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
renderTaskSuggestions();
const skeletons = screen.getAllByTestId("task-group-skeleton");
expect(skeletons.length).toBeGreaterThan(0);
await waitFor(() => {
MOCK_TASKS.forEach((taskGroup) => {
screen.getByText(taskGroup.title);
});
});
expect(screen.queryByTestId("task-group-skeleton")).not.toBeInTheDocument();
});
it("should display a button to settings if the user needs to sign in with their git provider", async () => {
renderTaskSuggestions(false);
expect(getSuggestedTasksSpy).not.toHaveBeenCalled();
const goToSettingsButton = await screen.findByTestId(
"navigate-to-settings-button",
);
expect(goToSettingsButton).toBeInTheDocument();
await userEvent.click(goToSettingsButton);
await screen.findByTestId("settings-screen");
});
});

View File

@@ -61,25 +61,25 @@ describe("PaymentForm", () => {
renderPaymentForm();
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "50.12");
await user.type(topUpInput, "50");
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
await user.click(topUpButton);
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50.12);
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50);
});
it("should round the top-up amount to two decimal places", async () => {
it("should only accept integer values", async () => {
const user = userEvent.setup();
renderPaymentForm();
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "50.125456");
await user.type(topUpInput, "50");
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
await user.click(topUpButton);
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50.13);
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50);
});
it("should disable the top-up button if the user enters an invalid amount", async () => {
@@ -100,7 +100,7 @@ describe("PaymentForm", () => {
renderPaymentForm();
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "50.12");
await user.type(topUpInput, "50");
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
await user.click(topUpButton);
@@ -114,7 +114,7 @@ describe("PaymentForm", () => {
renderPaymentForm();
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "-50.12");
await user.type(topUpInput, "-50");
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
await user.click(topUpButton);
@@ -139,6 +139,8 @@ describe("PaymentForm", () => {
const user = userEvent.setup();
renderPaymentForm();
// With type="number", the browser would prevent non-numeric input,
// but we'll test the validation logic anyway
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "abc");
@@ -160,5 +162,19 @@ describe("PaymentForm", () => {
expect(createCheckoutSessionSpy).not.toHaveBeenCalled();
});
test("user enters a decimal value", async () => {
const user = userEvent.setup();
renderPaymentForm();
// With step="1", the browser would validate this, but we'll test our validation logic
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "50.5");
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
await user.click(topUpButton);
expect(createCheckoutSessionSpy).not.toHaveBeenCalled();
});
});
});

View File

@@ -1,28 +0,0 @@
import { screen } from "@testing-library/react";
import { renderWithProviders } from "test-utils";
import { describe, afterEach, vi, it, expect } from "vitest";
import { ExplorerTree } from "#/components/features/file-explorer/explorer-tree";
const FILES = ["file-1-1.ts", "folder-1-2"];
describe.skip("ExplorerTree", () => {
afterEach(() => {
vi.resetAllMocks();
});
it("should render the explorer", () => {
renderWithProviders(<ExplorerTree files={FILES} defaultOpen />);
expect(screen.getByText("file-1-1.ts")).toBeInTheDocument();
expect(screen.getByText("folder-1-2")).toBeInTheDocument();
// TODO: make sure children render
});
it("should render the explorer given the defaultExpanded prop", () => {
renderWithProviders(<ExplorerTree files={FILES} />);
expect(screen.queryByText("file-1-1.ts")).toBeInTheDocument();
expect(screen.queryByText("folder-1-2")).toBeInTheDocument();
// TODO: make sure children don't render
});
});

View File

@@ -1,64 +0,0 @@
import { screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { renderWithProviders } from "test-utils";
import { describe, it, expect, vi, afterEach } from "vitest";
import { AgentState } from "#/types/agent-state";
import { FileExplorer } from "#/components/features/file-explorer/file-explorer";
import { FileService } from "#/api/file-service/file-service.api";
const getFilesSpy = vi.spyOn(FileService, "getFiles");
vi.mock("../../services/fileService", async () => ({
uploadFiles: vi.fn(),
}));
const renderFileExplorerWithRunningAgentState = () =>
renderWithProviders(<FileExplorer isOpen onToggle={() => {}} />, {
preloadedState: {
agent: {
curAgentState: AgentState.RUNNING,
},
},
});
describe.skip("FileExplorer", () => {
afterEach(() => {
vi.clearAllMocks();
});
it("should get the workspace directory", async () => {
renderFileExplorerWithRunningAgentState();
expect(await screen.findByText("folder1")).toBeInTheDocument();
expect(await screen.findByText("file1.ts")).toBeInTheDocument();
expect(getFilesSpy).toHaveBeenCalledTimes(1); // once for root
});
it("should refetch the workspace when clicking the refresh button", async () => {
const user = userEvent.setup();
renderFileExplorerWithRunningAgentState();
expect(await screen.findByText("folder1")).toBeInTheDocument();
expect(await screen.findByText("file1.ts")).toBeInTheDocument();
expect(getFilesSpy).toHaveBeenCalledTimes(1); // once for root
const refreshButton = screen.getByTestId("refresh");
await user.click(refreshButton);
expect(getFilesSpy).toHaveBeenCalledTimes(2); // once for root, once for refresh button
});
it("should toggle the explorer visibility when clicking the toggle button", async () => {
const user = userEvent.setup();
renderFileExplorerWithRunningAgentState();
const folder1 = await screen.findByText("folder1");
expect(folder1).toBeInTheDocument();
const toggleButton = screen.getByTestId("toggle");
await user.click(toggleButton);
expect(folder1).toBeInTheDocument();
expect(folder1).not.toBeVisible();
});
});

View File

@@ -1,110 +0,0 @@
import { screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { renderWithProviders } from "test-utils";
import { vi, describe, afterEach, it, expect } from "vitest";
import TreeNode from "#/components/features/file-explorer/tree-node";
import { FileService } from "#/api/file-service/file-service.api";
const getFileSpy = vi.spyOn(FileService, "getFile");
const getFilesSpy = vi.spyOn(FileService, "getFiles");
vi.mock("../../services/fileService", async () => ({
uploadFile: vi.fn(),
}));
describe.skip("TreeNode", () => {
afterEach(() => {
vi.clearAllMocks();
});
it("should render a file if property has no children", () => {
renderWithProviders(<TreeNode path="/file.ts" defaultOpen />);
expect(screen.getByText("file.ts")).toBeInTheDocument();
});
it("should render a folder if it's in a subdir", async () => {
renderWithProviders(<TreeNode path="/folder1/" defaultOpen />);
expect(getFilesSpy).toHaveBeenCalledWith("/folder1/");
expect(await screen.findByText("folder1")).toBeInTheDocument();
expect(await screen.findByText("file2.ts")).toBeInTheDocument();
});
it("should close a folder when clicking on it", async () => {
const user = userEvent.setup();
renderWithProviders(<TreeNode path="/folder1/" defaultOpen />);
const folder1 = await screen.findByText("folder1");
const file2 = await screen.findByText("file2.ts");
expect(folder1).toBeInTheDocument();
expect(file2).toBeInTheDocument();
await user.click(folder1);
expect(folder1).toBeInTheDocument();
expect(screen.queryByText("file2.ts")).not.toBeInTheDocument();
});
it("should open a folder when clicking on it", async () => {
const user = userEvent.setup();
renderWithProviders(<TreeNode path="/folder1/" />);
const folder1 = await screen.findByText("folder1");
expect(folder1).toBeInTheDocument();
expect(screen.queryByText("file2.ts")).not.toBeInTheDocument();
await user.click(folder1);
expect(getFilesSpy).toHaveBeenCalledWith("/folder1/");
expect(folder1).toBeInTheDocument();
expect(await screen.findByText("file2.ts")).toBeInTheDocument();
});
it("should call `OpenHands.getFile` and return the full path of a file when clicking on a file", async () => {
const user = userEvent.setup();
renderWithProviders(<TreeNode path="/folder1/file2.ts" defaultOpen />);
const file2 = screen.getByText("file2.ts");
await user.click(file2);
expect(getFileSpy).toHaveBeenCalledWith("/folder1/file2.ts");
});
it("should render the full explorer given the defaultOpen prop", async () => {
const user = userEvent.setup();
renderWithProviders(<TreeNode path="/" defaultOpen />);
expect(getFilesSpy).toHaveBeenCalledWith("/");
const file1 = await screen.findByText("file1.ts");
const folder1 = await screen.findByText("folder1");
expect(file1).toBeInTheDocument();
expect(folder1).toBeInTheDocument();
expect(screen.queryByText("file2.ts")).not.toBeInTheDocument();
await user.click(folder1);
expect(getFilesSpy).toHaveBeenCalledWith("folder1/");
expect(file1).toBeInTheDocument();
expect(folder1).toBeInTheDocument();
expect(await screen.findByText("file2.ts")).toBeInTheDocument();
});
it("should render all children as collapsed when defaultOpen is false", async () => {
renderWithProviders(<TreeNode path="/folder1/" defaultOpen={false} />);
const folder1 = await screen.findByText("folder1");
expect(folder1).toBeInTheDocument();
expect(screen.queryByText("file2.ts")).not.toBeInTheDocument();
await userEvent.click(folder1);
expect(getFilesSpy).toHaveBeenCalledWith("/folder1/");
expect(folder1).toBeInTheDocument();
expect(await screen.findByText("file2.ts")).toBeInTheDocument();
});
});

View File

@@ -1,9 +1,18 @@
import { render, screen } from "@testing-library/react";
import { describe, it, expect } from "vitest";
import { describe, it, expect, vi } from "vitest";
import { Messages } from "#/components/features/chat/messages";
import type { Message } from "#/message";
import { renderWithProviders } from "test-utils";
// Mock the useParams hook to provide a conversationId
vi.mock("react-router", async () => {
const actual = await vi.importActual<typeof import("react-router")>("react-router");
return {
...actual,
useParams: () => ({ conversationId: "test-conversation-id" }),
};
});
describe("File Operations Messages", () => {
it("should show success indicator for successful file read operation", () => {
const messages: Message[] = [

View File

@@ -5,7 +5,7 @@ import { Command, appendInput, appendOutput } from "#/state/command-slice";
import Terminal from "#/components/features/terminal/terminal";
const renderTerminal = (commands: Command[] = []) =>
renderWithProviders(<Terminal secrets={[]} />, {
renderWithProviders(<Terminal />, {
preloadedState: {
cmd: {
commands,
@@ -121,7 +121,7 @@ describe.skip("Terminal", () => {
// This test fails because it expects `disposeMock` to have been called before the component is unmounted.
it.skip("should dispose the terminal on unmount", () => {
const { unmount } = renderWithProviders(<Terminal secrets={[]} />);
const { unmount } = renderWithProviders(<Terminal />);
expect(mockTerminal.dispose).not.toHaveBeenCalled();

View File

@@ -1,31 +1,31 @@
import { beforeAll, describe, expect, it, vi } from "vitest";
import { render } from "@testing-library/react";
import { afterEach } from "node:test";
import { ReactNode } from "react";
import { useTerminal } from "#/hooks/use-terminal";
import { Command } from "#/state/command-slice";
import { AgentState } from "#/types/agent-state";
import { renderWithProviders } from "../../test-utils";
// Mock the WsClient context
vi.mock("#/context/ws-client-provider", () => ({
useWsClient: () => ({
send: vi.fn(),
status: "CONNECTED",
isLoadingMessages: false,
events: [],
}),
}));
interface TestTerminalComponentProps {
commands: Command[];
secrets: string[];
}
function TestTerminalComponent({
commands,
secrets,
}: TestTerminalComponentProps) {
const ref = useTerminal({ commands, secrets, disabled: false });
const ref = useTerminal({ commands });
return <div ref={ref} />;
}
interface WrapperProps {
children: ReactNode;
}
function Wrapper({ children }: WrapperProps) {
return <div>{children}</div>;
}
describe("useTerminal", () => {
const mockTerminal = vi.hoisted(() => ({
loadAddon: vi.fn(),
@@ -57,8 +57,11 @@ describe("useTerminal", () => {
});
it("should render", () => {
render(<TestTerminalComponent commands={[]} secrets={[]} />, {
wrapper: Wrapper,
renderWithProviders(<TestTerminalComponent commands={[]} />, {
preloadedState: {
agent: { curAgentState: AgentState.RUNNING },
cmd: { commands: [] },
},
});
});
@@ -68,15 +71,19 @@ describe("useTerminal", () => {
{ content: "hello", type: "output" },
];
render(<TestTerminalComponent commands={commands} secrets={[]} />, {
wrapper: Wrapper,
renderWithProviders(<TestTerminalComponent commands={commands} />, {
preloadedState: {
agent: { curAgentState: AgentState.RUNNING },
cmd: { commands },
},
});
expect(mockTerminal.writeln).toHaveBeenNthCalledWith(1, "echo hello");
expect(mockTerminal.writeln).toHaveBeenNthCalledWith(2, "hello");
});
it("should hide secrets in the terminal", () => {
// This test is no longer relevant as secrets filtering has been removed
it.skip("should hide secrets in the terminal", () => {
const secret = "super_secret_github_token";
const anotherSecret = "super_secret_another_token";
const commands: Command[] = [
@@ -87,23 +94,18 @@ describe("useTerminal", () => {
{ content: secret, type: "output" },
];
render(
renderWithProviders(
<TestTerminalComponent
commands={commands}
secrets={[secret, anotherSecret]}
/>,
{
wrapper: Wrapper,
preloadedState: {
agent: { curAgentState: AgentState.RUNNING },
cmd: { commands },
},
},
);
// BUG: `vi.clearAllMocks()` does not clear the number of calls
// therefore, we need to assume the order of the calls based
// on the test order
expect(mockTerminal.writeln).toHaveBeenNthCalledWith(
3,
`export GITHUB_TOKEN=${"*".repeat(10)},${"*".repeat(10)},${"*".repeat(10)}`,
);
expect(mockTerminal.writeln).toHaveBeenNthCalledWith(4, "*".repeat(10));
// This test is no longer relevant as secrets filtering has been removed
});
});

View File

@@ -0,0 +1,291 @@
import { render, screen, waitFor } from "@testing-library/react";
import { afterEach, describe, expect, it, vi } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import AppSettingsScreen from "#/routes/app-settings";
import OpenHands from "#/api/open-hands";
import { MOCK_DEFAULT_USER_SETTINGS } from "#/mocks/handlers";
import { AuthProvider } from "#/context/auth-context";
import { AvailableLanguages } from "#/i18n";
import * as CaptureConsent from "#/utils/handle-capture-consent";
import * as ToastHandlers from "#/utils/custom-toast-handlers";
const renderAppSettingsScreen = () =>
render(<AppSettingsScreen />, {
wrapper: ({ children }) => (
<QueryClientProvider client={new QueryClient()}>
<AuthProvider>{children}</AuthProvider>
</QueryClientProvider>
),
});
describe("Content", () => {
it("should render the screen", () => {
renderAppSettingsScreen();
screen.getByTestId("app-settings-screen");
});
it("should render the correct default values", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
language: "no",
user_consents_to_analytics: true,
enable_sound_notifications: true,
});
renderAppSettingsScreen();
await waitFor(() => {
const language = screen.getByTestId("language-input");
const analytics = screen.getByTestId("enable-analytics-switch");
const sound = screen.getByTestId("enable-sound-notifications-switch");
expect(language).toHaveValue("Norsk");
expect(analytics).toBeChecked();
expect(sound).toBeChecked();
});
});
it("should render the language options", async () => {
renderAppSettingsScreen();
const language = await screen.findByTestId("language-input");
await userEvent.click(language);
AvailableLanguages.forEach((lang) => {
const option = screen.getByText(lang.label);
expect(option).toBeInTheDocument();
});
});
});
describe("Form submission", () => {
afterEach(() => {
vi.clearAllMocks();
});
it("should submit the form with the correct values", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
renderAppSettingsScreen();
const language = await screen.findByTestId("language-input");
const analytics = await screen.findByTestId("enable-analytics-switch");
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
expect(language).toHaveValue("English");
expect(analytics).not.toBeChecked();
expect(sound).not.toBeChecked();
// change language
await userEvent.click(language);
const norsk = screen.getByText("Norsk");
await userEvent.click(norsk);
expect(language).toHaveValue("Norsk");
// toggle options
await userEvent.click(analytics);
expect(analytics).toBeChecked();
await userEvent.click(sound);
expect(sound).toBeChecked();
// submit the form
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
language: "no",
user_consents_to_analytics: true,
enable_sound_notifications: true,
}),
);
});
it("should only enable the submit button when there are changes", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
renderAppSettingsScreen();
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
// Language check
const language = await screen.findByTestId("language-input");
await userEvent.click(language);
const norsk = screen.getByText("Norsk");
await userEvent.click(norsk);
expect(submit).not.toBeDisabled();
await userEvent.click(language);
const english = screen.getByText("English");
await userEvent.click(english);
expect(submit).toBeDisabled();
// Analytics check
const analytics = await screen.findByTestId("enable-analytics-switch");
await userEvent.click(analytics);
expect(submit).not.toBeDisabled();
await userEvent.click(analytics);
expect(submit).toBeDisabled();
// Sound check
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
await userEvent.click(sound);
expect(submit).not.toBeDisabled();
await userEvent.click(sound);
expect(submit).toBeDisabled();
});
it("should call handleCaptureConsents with true when the analytics switch is toggled", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
const handleCaptureConsentsSpy = vi.spyOn(
CaptureConsent,
"handleCaptureConsent",
);
renderAppSettingsScreen();
const analytics = await screen.findByTestId("enable-analytics-switch");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(analytics);
await userEvent.click(submit);
await waitFor(() =>
expect(handleCaptureConsentsSpy).toHaveBeenCalledWith(true),
);
});
it("should call handleCaptureConsents with false when the analytics switch is toggled", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
user_consents_to_analytics: true,
});
const handleCaptureConsentsSpy = vi.spyOn(
CaptureConsent,
"handleCaptureConsent",
);
renderAppSettingsScreen();
const analytics = await screen.findByTestId("enable-analytics-switch");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(analytics);
await userEvent.click(submit);
await waitFor(() =>
expect(handleCaptureConsentsSpy).toHaveBeenCalledWith(false),
);
});
// flaky test
it.skip("should disable the button when submitting changes", async () => {
renderAppSettingsScreen();
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
await userEvent.click(sound);
expect(submit).not.toBeDisabled();
// submit the form
await userEvent.click(submit);
expect(submit).toHaveTextContent("Saving...");
expect(submit).toBeDisabled();
await waitFor(() => expect(submit).toHaveTextContent("Save"));
});
it("should disable the button after submitting changes", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
renderAppSettingsScreen();
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
await userEvent.click(sound);
expect(submit).not.toBeDisabled();
// submit the form
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
await waitFor(() => expect(submit).toBeDisabled());
});
});
describe("Status toasts", () => {
it("should call displaySuccessToast when the settings are saved", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
const displaySuccessToastSpy = vi.spyOn(
ToastHandlers,
"displaySuccessToast",
);
renderAppSettingsScreen();
// Toggle setting to change
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
await userEvent.click(sound);
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
await waitFor(() => expect(displaySuccessToastSpy).toHaveBeenCalled());
});
it("should call displayErrorToast when the settings fail to save", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
const displayErrorToastSpy = vi.spyOn(ToastHandlers, "displayErrorToast");
saveSettingsSpy.mockRejectedValue(new Error("Failed to save settings"));
renderAppSettingsScreen();
// Toggle setting to change
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
await userEvent.click(sound);
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
expect(displayErrorToastSpy).toHaveBeenCalled();
});
});

View File

@@ -0,0 +1,461 @@
import { render, screen, waitFor } from "@testing-library/react";
import { createRoutesStub } from "react-router";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import GitSettingsScreen from "#/routes/git-settings";
import OpenHands from "#/api/open-hands";
import { MOCK_DEFAULT_USER_SETTINGS } from "#/mocks/handlers";
import { AuthProvider } from "#/context/auth-context";
import { GetConfigResponse } from "#/api/open-hands.types";
import * as ToastHandlers from "#/utils/custom-toast-handlers";
const VALID_OSS_CONFIG: GetConfigResponse = {
APP_MODE: "oss",
GITHUB_CLIENT_ID: "123",
POSTHOG_CLIENT_KEY: "456",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
};
const VALID_SAAS_CONFIG: GetConfigResponse = {
APP_MODE: "saas",
GITHUB_CLIENT_ID: "123",
POSTHOG_CLIENT_KEY: "456",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
};
const queryClient = new QueryClient();
const GitSettingsRouterStub = createRoutesStub([
{
Component: GitSettingsScreen,
path: "/settings/github",
},
]);
const renderGitSettingsScreen = () => {
const { rerender, ...rest } = render(
<GitSettingsRouterStub initialEntries={["/settings/github"]} />,
{
wrapper: ({ children }) => (
<QueryClientProvider client={queryClient}>
<AuthProvider>{children}</AuthProvider>
</QueryClientProvider>
),
},
);
const rerenderGitSettingsScreen = () =>
rerender(
<QueryClientProvider client={queryClient}>
<AuthProvider>
<GitSettingsRouterStub initialEntries={["/settings/github"]} />
</AuthProvider>
</QueryClientProvider>,
);
return {
...rest,
rerender: rerenderGitSettingsScreen,
};
};
beforeEach(() => {
// Since we don't recreate the query client on every test, we need to
// reset the query client before each test to avoid state leaks
// between tests.
queryClient.invalidateQueries();
});
describe("Content", () => {
it("should render", async () => {
renderGitSettingsScreen();
await screen.findByTestId("git-settings-screen");
});
it("should render the inputs if OSS mode", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
const { rerender } = renderGitSettingsScreen();
await screen.findByTestId("github-token-input");
await screen.findByTestId("github-token-help-anchor");
await screen.findByTestId("gitlab-token-input");
await screen.findByTestId("gitlab-token-help-anchor");
getConfigSpy.mockResolvedValue(VALID_SAAS_CONFIG);
queryClient.invalidateQueries();
rerender();
await waitFor(() => {
expect(
screen.queryByTestId("github-token-input"),
).not.toBeInTheDocument();
expect(
screen.queryByTestId("github-token-help-anchor"),
).not.toBeInTheDocument();
expect(
screen.queryByTestId("gitlab-token-input"),
).not.toBeInTheDocument();
expect(
screen.queryByTestId("gitlab-token-help-anchor"),
).not.toBeInTheDocument();
});
});
it("should set '<hidden>' placeholder and indicator if the GitHub token is set", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {
github: false,
gitlab: false,
},
});
const { rerender } = renderGitSettingsScreen();
await waitFor(() => {
const githubInput = screen.getByTestId("github-token-input");
expect(githubInput).toHaveProperty("placeholder", "");
expect(
screen.queryByTestId("gh-set-token-indicator"),
).not.toBeInTheDocument();
const gitlabInput = screen.getByTestId("gitlab-token-input");
expect(gitlabInput).toHaveProperty("placeholder", "");
expect(
screen.queryByTestId("gl-set-token-indicator"),
).not.toBeInTheDocument();
});
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {
github: true,
gitlab: true,
},
});
queryClient.invalidateQueries();
rerender();
await waitFor(() => {
const githubInput = screen.getByTestId("github-token-input");
expect(githubInput).toHaveProperty("placeholder", "<hidden>");
expect(
screen.queryByTestId("gh-set-token-indicator"),
).toBeInTheDocument();
const gitlabInput = screen.getByTestId("gitlab-token-input");
expect(gitlabInput).toHaveProperty("placeholder", "<hidden>");
expect(
screen.queryByTestId("gl-set-token-indicator"),
).toBeInTheDocument();
});
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {
github: false,
gitlab: true,
},
});
queryClient.invalidateQueries();
rerender();
await waitFor(() => {
const githubInput = screen.getByTestId("github-token-input");
expect(githubInput).toHaveProperty("placeholder", "");
expect(
screen.queryByTestId("gh-set-token-indicator"),
).not.toBeInTheDocument();
const gitlabInput = screen.getByTestId("gitlab-token-input");
expect(gitlabInput).toHaveProperty("placeholder", "<hidden>");
expect(
screen.queryByTestId("gl-set-token-indicator"),
).toBeInTheDocument();
});
});
it("should render the 'Configure GitHub Repositories' button if SaaS mode and app slug exists", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
const { rerender } = renderGitSettingsScreen();
let button = screen.queryByTestId("configure-github-repositories-button");
expect(button).not.toBeInTheDocument();
expect(screen.getByTestId("submit-button")).toBeInTheDocument();
expect(screen.getByTestId("disconnect-tokens-button")).toBeInTheDocument();
getConfigSpy.mockResolvedValue(VALID_SAAS_CONFIG);
queryClient.invalidateQueries();
rerender();
await waitFor(() => {
// wait until queries are resolved
expect(queryClient.isFetching()).toBe(0);
button = screen.queryByTestId("configure-github-repositories-button");
expect(button).not.toBeInTheDocument();
});
getConfigSpy.mockResolvedValue({
...VALID_SAAS_CONFIG,
APP_SLUG: "test-slug",
});
queryClient.invalidateQueries();
rerender();
await waitFor(() => {
button = screen.getByTestId("configure-github-repositories-button");
expect(button).toBeInTheDocument();
expect(screen.queryByTestId("submit-button")).not.toBeInTheDocument();
expect(
screen.queryByTestId("disconnect-tokens-button"),
).not.toBeInTheDocument();
});
});
});
describe("Form submission", () => {
it("should save the GitHub token", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
renderGitSettingsScreen();
const githubInput = await screen.findByTestId("github-token-input");
const submit = await screen.findByTestId("submit-button");
await userEvent.type(githubInput, "test-token");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
provider_tokens: {
github: "test-token",
gitlab: "",
},
}),
);
const gitlabInput = await screen.findByTestId("gitlab-token-input");
await userEvent.type(gitlabInput, "test-token");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
provider_tokens: {
github: "",
gitlab: "test-token",
},
}),
);
});
it("should disable the button if there is no input", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
renderGitSettingsScreen();
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
const githubInput = await screen.findByTestId("github-token-input");
await userEvent.type(githubInput, "test-token");
expect(submit).not.toBeDisabled();
await userEvent.clear(githubInput);
expect(submit).toBeDisabled();
const gitlabInput = await screen.findByTestId("gitlab-token-input");
await userEvent.type(gitlabInput, "test-token");
expect(submit).not.toBeDisabled();
await userEvent.clear(gitlabInput);
expect(submit).toBeDisabled();
});
it("should enable a disconnect tokens button if there is at least one token set", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {
github: true,
gitlab: false,
},
});
renderGitSettingsScreen();
await screen.findByTestId("git-settings-screen");
let disconnectButton = await screen.findByTestId(
"disconnect-tokens-button",
);
await waitFor(() => expect(disconnectButton).not.toBeDisabled());
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {
github: false,
gitlab: false,
},
});
queryClient.invalidateQueries();
disconnectButton = await screen.findByTestId("disconnect-tokens-button");
await waitFor(() => expect(disconnectButton).toBeDisabled());
});
it("should call logout when pressing the disconnect tokens button", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const logoutSpy = vi.spyOn(OpenHands, "logout");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {
github: true,
gitlab: false,
},
});
renderGitSettingsScreen();
const disconnectButton = await screen.findByTestId(
"disconnect-tokens-button",
);
await waitFor(() => expect(disconnectButton).not.toBeDisabled());
await userEvent.click(disconnectButton);
expect(logoutSpy).toHaveBeenCalled();
});
// flaky test
it.skip("should disable the button when submitting changes", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
renderGitSettingsScreen();
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
const githubInput = await screen.findByTestId("github-token-input");
await userEvent.type(githubInput, "test-token");
expect(submit).not.toBeDisabled();
// submit the form
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
expect(submit).toHaveTextContent("Saving...");
expect(submit).toBeDisabled();
await waitFor(() => expect(submit).toHaveTextContent("Save"));
});
it("should disable the button after submitting changes", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
renderGitSettingsScreen();
await screen.findByTestId("git-settings-screen");
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
const githubInput = await screen.findByTestId("github-token-input");
await userEvent.type(githubInput, "test-token");
expect(submit).not.toBeDisabled();
// submit the form
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
expect(submit).toBeDisabled();
const gitlabInput = await screen.findByTestId("gitlab-token-input");
await userEvent.type(gitlabInput, "test-token");
expect(gitlabInput).toHaveValue("test-token");
expect(submit).not.toBeDisabled();
// submit the form
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
await waitFor(() => expect(submit).toBeDisabled());
});
});
describe("Status toasts", () => {
it("should call displaySuccessToast when the settings are saved", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
const displaySuccessToastSpy = vi.spyOn(
ToastHandlers,
"displaySuccessToast",
);
renderGitSettingsScreen();
// Toggle setting to change
const githubInput = await screen.findByTestId("github-token-input");
await userEvent.type(githubInput, "test-token");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
await waitFor(() => expect(displaySuccessToastSpy).toHaveBeenCalled());
});
it("should call displayErrorToast when the settings fail to save", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
const displayErrorToastSpy = vi.spyOn(ToastHandlers, "displayErrorToast");
saveSettingsSpy.mockRejectedValue(new Error("Failed to save settings"));
renderGitSettingsScreen();
// Toggle setting to change
const gitlabInput = await screen.findByTestId("gitlab-token-input");
await userEvent.type(gitlabInput, "test-token");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
expect(displayErrorToastSpy).toHaveBeenCalled();
});
});

View File

@@ -0,0 +1,377 @@
import { render, screen, waitFor, within } from "@testing-library/react";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import { createRoutesStub } from "react-router";
import { Provider } from "react-redux";
import { setupStore } from "test-utils";
import { AxiosError } from "axios";
import HomeScreen from "#/routes/home";
import { AuthProvider } from "#/context/auth-context";
import * as GitService from "#/api/git";
import { GitRepository } from "#/types/git";
import OpenHands from "#/api/open-hands";
import MainApp from "#/routes/root-layout";
const createAxiosNotFoundErrorObject = () =>
new AxiosError(
"Request failed with status code 404",
"ERR_BAD_REQUEST",
undefined,
undefined,
{
status: 404,
statusText: "Not Found",
data: { message: "Settings not found" },
headers: {},
// @ts-expect-error - we only need the response object for this test
config: {},
},
);
const RouterStub = createRoutesStub([
{
Component: MainApp,
path: "/",
children: [
{
Component: HomeScreen,
path: "/",
},
{
Component: () => <div data-testid="conversation-screen" />,
path: "/conversations/:conversationId",
},
{
Component: () => <div data-testid="settings-screen" />,
path: "/settings",
},
],
},
]);
const renderHomeScreen = (initialProvidersAreSet = true) =>
render(<RouterStub />, {
wrapper: ({ children }) => (
<Provider store={setupStore()}>
<AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
</AuthProvider>
</Provider>
),
});
const MOCK_RESPOSITORIES: GitRepository[] = [
{
id: 1,
full_name: "octocat/hello-world",
git_provider: "github",
is_public: true,
},
{
id: 2,
full_name: "octocat/earth",
git_provider: "github",
is_public: true,
},
];
describe("HomeScreen", () => {
it("should render", () => {
renderHomeScreen();
screen.getByTestId("home-screen");
});
it("should render the repository connector and suggested tasks sections", async () => {
renderHomeScreen();
screen.getByTestId("repo-connector");
screen.getByTestId("task-suggestions");
});
it("should have responsive layout for mobile and desktop screens", async () => {
renderHomeScreen();
const mainContainer = screen.getByTestId("home-screen").querySelector("main");
expect(mainContainer).toHaveClass("flex", "flex-col", "md:flex-row");
});
it("should filter the suggested tasks based on the selected repository", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderHomeScreen();
const taskSuggestions = screen.getByTestId("task-suggestions");
// Initially, all tasks should be visible
await waitFor(() => {
within(taskSuggestions).getByText("octocat/hello-world");
within(taskSuggestions).getByText("octocat/earth");
});
// Select a repository from the dropdown
const repoConnector = screen.getByTestId("repo-connector");
const dropdown = within(repoConnector).getByTestId("repo-dropdown");
await userEvent.click(dropdown);
const repoOption = screen.getAllByText("octocat/hello-world")[1];
await userEvent.click(repoOption);
// After selecting a repository, only tasks related to that repository should be visible
await waitFor(() => {
within(taskSuggestions).getByText("octocat/hello-world");
expect(
within(taskSuggestions).queryByText("octocat/earth"),
).not.toBeInTheDocument();
});
});
it("should reset the filtered tasks when the selected repository is cleared", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderHomeScreen();
const taskSuggestions = screen.getByTestId("task-suggestions");
// Initially, all tasks should be visible
await waitFor(() => {
within(taskSuggestions).getByText("octocat/hello-world");
within(taskSuggestions).getByText("octocat/earth");
});
// Select a repository from the dropdown
const repoConnector = screen.getByTestId("repo-connector");
const dropdown = within(repoConnector).getByTestId("repo-dropdown");
await userEvent.click(dropdown);
const repoOption = screen.getAllByText("octocat/hello-world")[1];
await userEvent.click(repoOption);
// After selecting a repository, only tasks related to that repository should be visible
await waitFor(() => {
within(taskSuggestions).getByText("octocat/hello-world");
expect(
within(taskSuggestions).queryByText("octocat/earth"),
).not.toBeInTheDocument();
});
// Clear the selected repository
await userEvent.clear(dropdown);
// All tasks should be visible again
await waitFor(() => {
within(taskSuggestions).getByText("octocat/hello-world");
within(taskSuggestions).getByText("octocat/earth");
});
});
describe("launch buttons", () => {
const setupLaunchButtons = async () => {
let headerLaunchButton = screen.getByTestId("header-launch-button");
let repoLaunchButton = screen.getByTestId("repo-launch-button");
let tasksLaunchButtons =
await screen.findAllByTestId("task-launch-button");
// Select a repository from the dropdown to enable the repo launch button
const repoConnector = screen.getByTestId("repo-connector");
const dropdown = within(repoConnector).getByTestId("repo-dropdown");
await userEvent.click(dropdown);
const repoOption = screen.getAllByText("octocat/hello-world")[1];
await userEvent.click(repoOption);
expect(headerLaunchButton).not.toBeDisabled();
expect(repoLaunchButton).not.toBeDisabled();
tasksLaunchButtons.forEach((button) => {
expect(button).not.toBeDisabled();
});
headerLaunchButton = screen.getByTestId("header-launch-button");
repoLaunchButton = screen.getByTestId("repo-launch-button");
tasksLaunchButtons = await screen.findAllByTestId("task-launch-button");
return {
headerLaunchButton,
repoLaunchButton,
tasksLaunchButtons,
};
};
beforeEach(() => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
});
it("should disable the other launch buttons when the header launch button is clicked", async () => {
renderHomeScreen();
const { headerLaunchButton, repoLaunchButton } =
await setupLaunchButtons();
const tasksLaunchButtonsAfter =
await screen.findAllByTestId("task-launch-button");
// All other buttons should be disabled when the header button is clicked
await userEvent.click(headerLaunchButton);
expect(headerLaunchButton).toBeDisabled();
expect(repoLaunchButton).toBeDisabled();
tasksLaunchButtonsAfter.forEach((button) => {
expect(button).toBeDisabled();
});
});
it("should disable the other launch buttons when the repo launch button is clicked", async () => {
renderHomeScreen();
const { headerLaunchButton, repoLaunchButton } =
await setupLaunchButtons();
const tasksLaunchButtonsAfter =
await screen.findAllByTestId("task-launch-button");
// All other buttons should be disabled when the repo button is clicked
await userEvent.click(repoLaunchButton);
expect(headerLaunchButton).toBeDisabled();
expect(repoLaunchButton).toBeDisabled();
tasksLaunchButtonsAfter.forEach((button) => {
expect(button).toBeDisabled();
});
});
it("should disable the other launch buttons when any task launch button is clicked", async () => {
renderHomeScreen();
const { headerLaunchButton, repoLaunchButton, tasksLaunchButtons } =
await setupLaunchButtons();
const tasksLaunchButtonsAfter =
await screen.findAllByTestId("task-launch-button");
// All other buttons should be disabled when the task button is clicked
await userEvent.click(tasksLaunchButtons[0]);
expect(headerLaunchButton).toBeDisabled();
expect(repoLaunchButton).toBeDisabled();
tasksLaunchButtonsAfter.forEach((button) => {
expect(button).toBeDisabled();
});
});
});
it("should hide the suggested tasks section if not authed with git(hub|lab)", async () => {
renderHomeScreen(false);
const taskSuggestions = screen.queryByTestId("task-suggestions");
const repoConnector = screen.getByTestId("repo-connector");
expect(taskSuggestions).not.toBeInTheDocument();
expect(repoConnector).toBeInTheDocument();
});
});
describe("Settings 404", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
it("should open the settings modal if GET /settings fails with a 404", async () => {
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderHomeScreen();
const settingsModal = await screen.findByTestId("ai-config-modal");
expect(settingsModal).toBeInTheDocument();
});
it("should navigate to the settings screen when clicking the advanced settings button", async () => {
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
const user = userEvent.setup();
renderHomeScreen();
const settingsScreen = screen.queryByTestId("settings-screen");
expect(settingsScreen).not.toBeInTheDocument();
const settingsModal = await screen.findByTestId("ai-config-modal");
expect(settingsModal).toBeInTheDocument();
const advancedSettingsButton = await screen.findByTestId(
"advanced-settings-link",
);
await user.click(advancedSettingsButton);
const settingsScreenAfter = await screen.findByTestId("settings-screen");
expect(settingsScreenAfter).toBeInTheDocument();
const settingsModalAfter = screen.queryByTestId("ai-config-modal");
expect(settingsModalAfter).not.toBeInTheDocument();
});
it("should not open the settings modal if GET /settings fails but is SaaS mode", async () => {
// @ts-expect-error - we only need APP_MODE for this test
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
});
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderHomeScreen();
// small hack to wait for the modal to not appear
await expect(
screen.findByTestId("ai-config-modal", {}, { timeout: 1000 }),
).rejects.toThrow();
});
});
describe("Setup Payment modal", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
it("should only render if SaaS mode and is new user", async () => {
// @ts-expect-error - we only need the APP_MODE for this test
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
FEATURE_FLAGS: {
ENABLE_BILLING: true,
HIDE_LLM_SETTINGS: false,
},
});
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderHomeScreen();
const setupPaymentModal = await screen.findByTestId(
"proceed-to-stripe-button",
);
expect(setupPaymentModal).toBeInTheDocument();
});
});

View File

@@ -1,177 +0,0 @@
import { createRoutesStub } from "react-router";
import { afterEach, describe, expect, it, vi } from "vitest";
import { renderWithProviders } from "test-utils";
import userEvent from "@testing-library/user-event";
import { screen } from "@testing-library/react";
import { AxiosError } from "axios";
import MainApp from "#/routes/root-layout";
import SettingsScreen from "#/routes/settings";
import Home from "#/routes/home";
import OpenHands from "#/api/open-hands";
const createAxiosNotFoundErrorObject = () =>
new AxiosError(
"Request failed with status code 404",
"ERR_BAD_REQUEST",
undefined,
undefined,
{
status: 404,
statusText: "Not Found",
data: { message: "Settings not found" },
headers: {},
// @ts-expect-error - we only need the response object for this test
config: {},
},
);
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
const RouterStub = createRoutesStub([
{
// layout route
Component: MainApp,
path: "/",
children: [
{
// home route
Component: Home,
path: "/",
},
{
Component: SettingsScreen,
path: "/settings",
},
],
},
]);
afterEach(() => {
vi.clearAllMocks();
});
describe("Home Screen", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
it("should render the home screen", () => {
renderWithProviders(<RouterStub initialEntries={["/"]} />);
});
it("should navigate to the settings screen when the settings button is clicked", async () => {
const user = userEvent.setup();
renderWithProviders(<RouterStub initialEntries={["/"]} />);
const settingsButton = await screen.findByTestId("settings-button");
await user.click(settingsButton);
const settingsScreen = await screen.findByTestId("settings-screen");
expect(settingsScreen).toBeInTheDocument();
});
it("should navigate to the settings when pressing 'Connect to GitHub' if the user isn't authenticated", async () => {
// @ts-expect-error - we only need APP_MODE for this test
getConfigSpy.mockResolvedValue({
APP_MODE: "oss",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
});
const user = userEvent.setup();
renderWithProviders(<RouterStub initialEntries={["/"]} />);
const connectToGitHubButton =
await screen.findByTestId("connect-to-github");
await user.click(connectToGitHubButton);
const settingsScreen = await screen.findByTestId("settings-screen");
expect(settingsScreen).toBeInTheDocument();
});
});
describe("Settings 404", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
it("should open the settings modal if GET /settings fails with a 404", async () => {
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderWithProviders(<RouterStub initialEntries={["/"]} />);
const settingsModal = await screen.findByTestId("ai-config-modal");
expect(settingsModal).toBeInTheDocument();
});
it("should navigate to the settings screen when clicking the advanced settings button", async () => {
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
const user = userEvent.setup();
renderWithProviders(<RouterStub initialEntries={["/"]} />);
const settingsScreen = screen.queryByTestId("settings-screen");
expect(settingsScreen).not.toBeInTheDocument();
const settingsModal = await screen.findByTestId("ai-config-modal");
expect(settingsModal).toBeInTheDocument();
const advancedSettingsButton = await screen.findByTestId(
"advanced-settings-link",
);
await user.click(advancedSettingsButton);
const settingsScreenAfter = await screen.findByTestId("settings-screen");
expect(settingsScreenAfter).toBeInTheDocument();
const settingsModalAfter = screen.queryByTestId("ai-config-modal");
expect(settingsModalAfter).not.toBeInTheDocument();
});
it("should not open the settings modal if GET /settings fails but is SaaS mode", async () => {
// @ts-expect-error - we only need APP_MODE for this test
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
});
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderWithProviders(<RouterStub initialEntries={["/"]} />);
// small hack to wait for the modal to not appear
await expect(
screen.findByTestId("ai-config-modal", {}, { timeout: 1000 }),
).rejects.toThrow();
});
});
describe("Setup Payment modal", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
afterEach(() => {
vi.resetAllMocks();
});
it("should only render if SaaS mode and is new user", async () => {
// @ts-expect-error - we only need the APP_MODE for this test
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
FEATURE_FLAGS: {
ENABLE_BILLING: true,
HIDE_LLM_SETTINGS: false,
},
});
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderWithProviders(<RouterStub initialEntries={["/"]} />);
const setupPaymentModal = await screen.findByTestId(
"proceed-to-stripe-button",
);
expect(setupPaymentModal).toBeInTheDocument();
});
});

View File

@@ -0,0 +1,674 @@
import { render, screen, waitFor, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
import LlmSettingsScreen from "#/routes/llm-settings";
import OpenHands from "#/api/open-hands";
import {
MOCK_DEFAULT_USER_SETTINGS,
resetTestHandlersMockSettings,
} from "#/mocks/handlers";
import { AuthProvider } from "#/context/auth-context";
import * as AdvancedSettingsUtlls from "#/utils/has-advanced-settings-set";
import * as ToastHandlers from "#/utils/custom-toast-handlers";
const renderLlmSettingsScreen = () =>
render(<LlmSettingsScreen />, {
wrapper: ({ children }) => (
<QueryClientProvider client={new QueryClient()}>
<AuthProvider>{children}</AuthProvider>
</QueryClientProvider>
),
});
beforeEach(() => {
vi.resetAllMocks();
resetTestHandlersMockSettings();
});
describe("Content", () => {
describe("Basic form", () => {
it("should render the basic form by default", async () => {
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const basicFom = screen.getByTestId("llm-settings-form-basic");
within(basicFom).getByTestId("llm-provider-input");
within(basicFom).getByTestId("llm-model-input");
within(basicFom).getByTestId("llm-api-key-input");
within(basicFom).getByTestId("llm-api-key-help-anchor");
});
it("should render the default values if non exist", async () => {
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const provider = screen.getByTestId("llm-provider-input");
const model = screen.getByTestId("llm-model-input");
const apiKey = screen.getByTestId("llm-api-key-input");
await waitFor(() => {
expect(provider).toHaveValue("Anthropic");
expect(model).toHaveValue("claude-3-5-sonnet-20241022");
expect(apiKey).toHaveValue("");
expect(apiKey).toHaveProperty("placeholder", "");
});
});
it("should render the existing settings values", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
llm_model: "openai/gpt-4o",
llm_api_key_set: true,
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const provider = screen.getByTestId("llm-provider-input");
const model = screen.getByTestId("llm-model-input");
const apiKey = screen.getByTestId("llm-api-key-input");
await waitFor(() => {
expect(provider).toHaveValue("OpenAI");
expect(model).toHaveValue("gpt-4o");
expect(apiKey).toHaveValue("");
expect(apiKey).toHaveProperty("placeholder", "<hidden>");
expect(screen.getByTestId("set-indicator")).toBeInTheDocument();
});
});
});
describe("Advanced form", () => {
it("should render the advanced form if the switch is toggled", async () => {
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
const basicForm = screen.getByTestId("llm-settings-form-basic");
expect(
screen.queryByTestId("llm-settings-form-advanced"),
).not.toBeInTheDocument();
expect(basicForm).toBeInTheDocument();
await userEvent.click(advancedSwitch);
expect(
screen.queryByTestId("llm-settings-form-advanced"),
).toBeInTheDocument();
expect(basicForm).not.toBeInTheDocument();
const advancedForm = screen.getByTestId("llm-settings-form-advanced");
within(advancedForm).getByTestId("llm-custom-model-input");
within(advancedForm).getByTestId("base-url-input");
within(advancedForm).getByTestId("llm-api-key-input");
within(advancedForm).getByTestId("llm-api-key-help-anchor");
within(advancedForm).getByTestId("agent-input");
within(advancedForm).getByTestId("enable-confirmation-mode-switch");
within(advancedForm).getByTestId("enable-memory-condenser-switch");
await userEvent.click(advancedSwitch);
expect(
screen.queryByTestId("llm-settings-form-advanced"),
).not.toBeInTheDocument();
expect(screen.getByTestId("llm-settings-form-basic")).toBeInTheDocument();
});
it("should render the default advanced settings", async () => {
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
expect(advancedSwitch).not.toBeChecked();
await userEvent.click(advancedSwitch);
const model = screen.getByTestId("llm-custom-model-input");
const baseUrl = screen.getByTestId("base-url-input");
const apiKey = screen.getByTestId("llm-api-key-input");
const agent = screen.getByTestId("agent-input");
const confirmation = screen.getByTestId(
"enable-confirmation-mode-switch",
);
const condensor = screen.getByTestId("enable-memory-condenser-switch");
expect(model).toHaveValue("anthropic/claude-3-5-sonnet-20241022");
expect(baseUrl).toHaveValue("");
expect(apiKey).toHaveValue("");
expect(apiKey).toHaveProperty("placeholder", "");
expect(agent).toHaveValue("CodeActAgent");
expect(confirmation).not.toBeChecked();
expect(condensor).toBeChecked();
// check that security analyzer is present
expect(
screen.queryByTestId("security-analyzer-input"),
).not.toBeInTheDocument();
await userEvent.click(confirmation);
screen.getByTestId("security-analyzer-input");
});
it("should render the advanced form if existings settings are advanced", async () => {
const hasAdvancedSettingsSetSpy = vi.spyOn(
AdvancedSettingsUtlls,
"hasAdvancedSettingsSet",
);
hasAdvancedSettingsSetSpy.mockReturnValue(true);
renderLlmSettingsScreen();
await waitFor(() => {
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
expect(advancedSwitch).toBeChecked();
screen.getByTestId("llm-settings-form-advanced");
});
});
it("should render existing advanced settings correctly", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
llm_model: "openai/gpt-4o",
llm_base_url: "https://api.openai.com/v1/chat/completions",
llm_api_key_set: true,
agent: "CoActAgent",
confirmation_mode: true,
enable_default_condenser: false,
security_analyzer: "mock-invariant",
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const model = screen.getByTestId("llm-custom-model-input");
const baseUrl = screen.getByTestId("base-url-input");
const apiKey = screen.getByTestId("llm-api-key-input");
const agent = screen.getByTestId("agent-input");
const confirmation = screen.getByTestId(
"enable-confirmation-mode-switch",
);
const condensor = screen.getByTestId("enable-memory-condenser-switch");
const securityAnalyzer = screen.getByTestId("security-analyzer-input");
await waitFor(() => {
expect(model).toHaveValue("openai/gpt-4o");
expect(baseUrl).toHaveValue(
"https://api.openai.com/v1/chat/completions",
);
expect(apiKey).toHaveValue("");
expect(apiKey).toHaveProperty("placeholder", "<hidden>");
expect(agent).toHaveValue("CoActAgent");
expect(confirmation).toBeChecked();
expect(condensor).not.toBeChecked();
expect(securityAnalyzer).toHaveValue("mock-invariant");
});
});
});
it.todo("should render an indicator if the llm api key is set");
});
describe("Form submission", () => {
it("should submit the basic form with the correct values", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const provider = screen.getByTestId("llm-provider-input");
const model = screen.getByTestId("llm-model-input");
const apiKey = screen.getByTestId("llm-api-key-input");
// select provider
await userEvent.click(provider);
const providerOption = screen.getByText("OpenAI");
await userEvent.click(providerOption);
expect(provider).toHaveValue("OpenAI");
// enter api key
await userEvent.type(apiKey, "test-api-key");
// select model
await userEvent.click(model);
const modelOption = screen.getByText("gpt-4o");
await userEvent.click(modelOption);
expect(model).toHaveValue("gpt-4o");
const submitButton = screen.getByTestId("submit-button");
await userEvent.click(submitButton);
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
llm_model: "openai/gpt-4o",
llm_api_key: "test-api-key",
}),
);
});
it("should submit the advanced form with the correct values", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
const model = screen.getByTestId("llm-custom-model-input");
const baseUrl = screen.getByTestId("base-url-input");
const apiKey = screen.getByTestId("llm-api-key-input");
const agent = screen.getByTestId("agent-input");
const confirmation = screen.getByTestId("enable-confirmation-mode-switch");
const condensor = screen.getByTestId("enable-memory-condenser-switch");
// enter custom model
await userEvent.clear(model);
await userEvent.type(model, "openai/gpt-4o");
expect(model).toHaveValue("openai/gpt-4o");
// enter base url
await userEvent.type(baseUrl, "https://api.openai.com/v1/chat/completions");
expect(baseUrl).toHaveValue("https://api.openai.com/v1/chat/completions");
// enter api key
await userEvent.type(apiKey, "test-api-key");
// toggle confirmation mode
await userEvent.click(confirmation);
expect(confirmation).toBeChecked();
// toggle memory condensor
await userEvent.click(condensor);
expect(condensor).not.toBeChecked();
// select agent
await userEvent.click(agent);
const agentOption = screen.getByText("CoActAgent");
await userEvent.click(agentOption);
expect(agent).toHaveValue("CoActAgent");
// select security analyzer
const securityAnalyzer = screen.getByTestId("security-analyzer-input");
await userEvent.click(securityAnalyzer);
const securityAnalyzerOption = screen.getByText("mock-invariant");
await userEvent.click(securityAnalyzerOption);
const submitButton = screen.getByTestId("submit-button");
await userEvent.click(submitButton);
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
llm_model: "openai/gpt-4o",
llm_base_url: "https://api.openai.com/v1/chat/completions",
agent: "CoActAgent",
confirmation_mode: true,
enable_default_condenser: false,
security_analyzer: "mock-invariant",
}),
);
});
it("should disable the button if there are no changes in the basic form", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
llm_model: "openai/gpt-4o",
llm_api_key_set: true,
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
screen.getByTestId("llm-settings-form-basic");
const submitButton = screen.getByTestId("submit-button");
expect(submitButton).toBeDisabled();
const model = screen.getByTestId("llm-model-input");
const apiKey = screen.getByTestId("llm-api-key-input");
// select model
await userEvent.click(model);
const modelOption = screen.getByText("gpt-4o-mini");
await userEvent.click(modelOption);
expect(model).toHaveValue("gpt-4o-mini");
expect(submitButton).not.toBeDisabled();
// reset model
await userEvent.click(model);
const modelOption2 = screen.getByText("gpt-4o");
await userEvent.click(modelOption2);
expect(model).toHaveValue("gpt-4o");
expect(submitButton).toBeDisabled();
// set api key
await userEvent.type(apiKey, "test-api-key");
expect(apiKey).toHaveValue("test-api-key");
expect(submitButton).not.toBeDisabled();
// reset api key
await userEvent.clear(apiKey);
expect(apiKey).toHaveValue("");
expect(submitButton).toBeDisabled();
});
it("should disable the button if there are no changes in the advanced form", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
llm_model: "openai/gpt-4o",
llm_base_url: "https://api.openai.com/v1/chat/completions",
llm_api_key_set: true,
confirmation_mode: true,
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
screen.getByTestId("llm-settings-form-advanced");
const submitButton = screen.getByTestId("submit-button");
expect(submitButton).toBeDisabled();
const model = screen.getByTestId("llm-custom-model-input");
const baseUrl = screen.getByTestId("base-url-input");
const apiKey = screen.getByTestId("llm-api-key-input");
const agent = screen.getByTestId("agent-input");
const confirmation = screen.getByTestId("enable-confirmation-mode-switch");
const condensor = screen.getByTestId("enable-memory-condenser-switch");
// enter custom model
await userEvent.type(model, "-mini");
expect(model).toHaveValue("openai/gpt-4o-mini");
expect(submitButton).not.toBeDisabled();
// reset model
await userEvent.clear(model);
expect(model).toHaveValue("");
expect(submitButton).toBeDisabled();
await userEvent.type(model, "openai/gpt-4o");
expect(model).toHaveValue("openai/gpt-4o");
expect(submitButton).toBeDisabled();
// enter base url
await userEvent.type(baseUrl, "/extra");
expect(baseUrl).toHaveValue(
"https://api.openai.com/v1/chat/completions/extra",
);
expect(submitButton).not.toBeDisabled();
await userEvent.clear(baseUrl);
expect(baseUrl).toHaveValue("");
expect(submitButton).not.toBeDisabled();
await userEvent.type(baseUrl, "https://api.openai.com/v1/chat/completions");
expect(baseUrl).toHaveValue("https://api.openai.com/v1/chat/completions");
expect(submitButton).toBeDisabled();
// set api key
await userEvent.type(apiKey, "test-api-key");
expect(apiKey).toHaveValue("test-api-key");
expect(submitButton).not.toBeDisabled();
// reset api key
await userEvent.clear(apiKey);
expect(apiKey).toHaveValue("");
expect(submitButton).toBeDisabled();
// set agent
await userEvent.clear(agent);
await userEvent.type(agent, "test-agent");
expect(agent).toHaveValue("test-agent");
expect(submitButton).not.toBeDisabled();
// reset agent
await userEvent.clear(agent);
expect(agent).toHaveValue("");
expect(submitButton).toBeDisabled();
await userEvent.type(agent, "CodeActAgent");
expect(agent).toHaveValue("CodeActAgent");
expect(submitButton).toBeDisabled();
// toggle confirmation mode
await userEvent.click(confirmation);
expect(confirmation).not.toBeChecked();
expect(submitButton).not.toBeDisabled();
await userEvent.click(confirmation);
expect(confirmation).toBeChecked();
expect(submitButton).toBeDisabled();
// toggle memory condensor
await userEvent.click(condensor);
expect(condensor).not.toBeChecked();
expect(submitButton).not.toBeDisabled();
await userEvent.click(condensor);
expect(condensor).toBeChecked();
expect(submitButton).toBeDisabled();
// select security analyzer
const securityAnalyzer = screen.getByTestId("security-analyzer-input");
await userEvent.click(securityAnalyzer);
const securityAnalyzerOption = screen.getByText("mock-invariant");
await userEvent.click(securityAnalyzerOption);
expect(securityAnalyzer).toHaveValue("mock-invariant");
expect(submitButton).not.toBeDisabled();
await userEvent.clear(securityAnalyzer);
expect(securityAnalyzer).toHaveValue("");
expect(submitButton).toBeDisabled();
});
it("should reset button state when switching between forms", async () => {
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
const submitButton = screen.getByTestId("submit-button");
expect(submitButton).toBeDisabled();
// dirty the basic form
const apiKey = screen.getByTestId("llm-api-key-input");
await userEvent.type(apiKey, "test-api-key");
expect(submitButton).not.toBeDisabled();
await userEvent.click(advancedSwitch);
expect(submitButton).toBeDisabled();
// dirty the advanced form
const model = screen.getByTestId("llm-custom-model-input");
await userEvent.type(model, "openai/gpt-4o");
expect(submitButton).not.toBeDisabled();
await userEvent.click(advancedSwitch);
expect(submitButton).toBeDisabled();
});
// flaky test
it.skip("should disable the button when submitting changes", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const apiKey = screen.getByTestId("llm-api-key-input");
await userEvent.type(apiKey, "test-api-key");
const submitButton = screen.getByTestId("submit-button");
await userEvent.click(submitButton);
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
llm_api_key: "test-api-key",
}),
);
expect(submitButton).toHaveTextContent("Saving...");
expect(submitButton).toBeDisabled();
await waitFor(() => {
expect(submitButton).toHaveTextContent("Save");
expect(submitButton).toBeDisabled();
});
});
});
describe("Status toasts", () => {
describe("Basic form", () => {
it("should call displaySuccessToast when the settings are saved", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const displaySuccessToastSpy = vi.spyOn(
ToastHandlers,
"displaySuccessToast",
);
renderLlmSettingsScreen();
// Toggle setting to change
const apiKeyInput = await screen.findByTestId("llm-api-key-input");
await userEvent.type(apiKeyInput, "test-api-key");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
await waitFor(() => expect(displaySuccessToastSpy).toHaveBeenCalled());
});
it("should call displayErrorToast when the settings fail to save", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const displayErrorToastSpy = vi.spyOn(ToastHandlers, "displayErrorToast");
saveSettingsSpy.mockRejectedValue(new Error("Failed to save settings"));
renderLlmSettingsScreen();
// Toggle setting to change
const apiKeyInput = await screen.findByTestId("llm-api-key-input");
await userEvent.type(apiKeyInput, "test-api-key");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
expect(displayErrorToastSpy).toHaveBeenCalled();
});
});
describe("Advanced form", () => {
it("should call displaySuccessToast when the settings are saved", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const displaySuccessToastSpy = vi.spyOn(
ToastHandlers,
"displaySuccessToast",
);
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
await screen.findByTestId("llm-settings-form-advanced");
// Toggle setting to change
const apiKeyInput = await screen.findByTestId("llm-api-key-input");
await userEvent.type(apiKeyInput, "test-api-key");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
await waitFor(() => expect(displaySuccessToastSpy).toHaveBeenCalled());
});
it("should call displayErrorToast when the settings fail to save", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const displayErrorToastSpy = vi.spyOn(ToastHandlers, "displayErrorToast");
saveSettingsSpy.mockRejectedValue(new Error("Failed to save settings"));
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
await screen.findByTestId("llm-settings-form-advanced");
// Toggle setting to change
const apiKeyInput = await screen.findByTestId("llm-api-key-input");
await userEvent.type(apiKeyInput, "test-api-key");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
expect(displayErrorToastSpy).toHaveBeenCalled();
});
});
});
describe("SaaS mode", () => {
it("should not render the runtime settings input in oss mode", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return mode
getConfigSpy.mockResolvedValue({
APP_MODE: "oss",
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
await screen.findByTestId("llm-settings-form-advanced");
const runtimeSettingsInput = screen.queryByTestId("runtime-settings-input");
expect(runtimeSettingsInput).not.toBeInTheDocument();
});
it("should render the runtime settings input in saas mode", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return mode
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
await screen.findByTestId("llm-settings-form-advanced");
const runtimeSettingsInput = screen.queryByTestId("runtime-settings-input");
expect(runtimeSettingsInput).toBeInTheDocument();
});
it("should always render the runtime settings input as disabled", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return mode
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
await screen.findByTestId("llm-settings-form-advanced");
const runtimeSettingsInput = screen.queryByTestId("runtime-settings-input");
expect(runtimeSettingsInput).toBeInTheDocument();
expect(runtimeSettingsInput).toBeDisabled();
});
});

View File

@@ -1,4 +1,4 @@
import { screen, waitFor, within } from "@testing-library/react";
import { screen, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, describe, expect, it, vi } from "vitest";
import { createRoutesStub } from "react-router";
@@ -7,6 +7,30 @@ import OpenHands from "#/api/open-hands";
import SettingsScreen from "#/routes/settings";
import { PaymentForm } from "#/components/features/payment/payment-form";
// Mock the i18next hook
vi.mock("react-i18next", async () => {
const actual = await vi.importActual<typeof import("react-i18next")>("react-i18next");
return {
...actual,
useTranslation: () => ({
t: (key: string) => {
const translations: Record<string, string> = {
"SETTINGS$NAV_GIT": "Git",
"SETTINGS$NAV_APPLICATION": "Application",
"SETTINGS$NAV_CREDITS": "Credits",
"SETTINGS$NAV_API_KEYS": "API Keys",
"SETTINGS$NAV_LLM": "LLM",
"SETTINGS$TITLE": "Settings"
};
return translations[key] || key;
},
i18n: {
changeLanguage: vi.fn(),
},
}),
};
});
describe("Settings Billing", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
@@ -19,18 +43,22 @@ describe("Settings Billing", () => {
Component: () => <PaymentForm />,
path: "/settings/billing",
},
{
Component: () => <div data-testid="git-settings-screen" />,
path: "/settings/git",
},
],
},
]);
const renderSettingsScreen = () =>
renderWithProviders(<RoutesStub initialEntries={["/settings"]} />);
renderWithProviders(<RoutesStub initialEntries={["/settings/billing"]} />);
afterEach(() => {
vi.clearAllMocks();
});
it("should not render the navbar if OSS mode", async () => {
it("should not render the credits tab if OSS mode", async () => {
getConfigSpy.mockResolvedValue({
APP_MODE: "oss",
GITHUB_CLIENT_ID: "123",
@@ -43,13 +71,12 @@ describe("Settings Billing", () => {
renderSettingsScreen();
await waitFor(() => {
const navbar = screen.queryByTestId("settings-navbar");
expect(navbar).not.toBeInTheDocument();
});
const navbar = await screen.findByTestId("settings-navbar");
const credits = within(navbar).queryByText("Credits");
expect(credits).not.toBeInTheDocument();
});
it("should render the navbar if SaaS mode", async () => {
it("should render the credits tab if SaaS mode and billing is enabled", async () => {
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
GITHUB_CLIENT_ID: "123",
@@ -62,11 +89,8 @@ describe("Settings Billing", () => {
renderSettingsScreen();
await waitFor(() => {
const navbar = screen.getByTestId("settings-navbar");
within(navbar).getByText("Account");
within(navbar).getByText("Credits");
});
const navbar = await screen.findByTestId("settings-navbar");
within(navbar).getByText("Credits");
});
it("should render the billing settings if clicking the credits item", async () => {
@@ -88,6 +112,6 @@ describe("Settings Billing", () => {
await user.click(credits);
const billingSection = await screen.findByTestId("billing-settings");
within(billingSection).getByText("PAYMENT$MANAGE_CREDITS");
expect(billingSection).toBeInTheDocument();
});
});

File diff suppressed because it is too large Load Diff

View File

@@ -48,4 +48,4 @@ describe("Observations Service", () => {
});
});
});
});
});

View File

@@ -0,0 +1,101 @@
import { expect, test } from "vitest";
import {
SuggestedTask,
SuggestedTaskGroup,
} from "#/components/features/home/tasks/task.types";
import { groupSuggestedTasks } from "#/utils/group-suggested-tasks";
const rawTasks: SuggestedTask[] = [
{
issue_number: 1,
repo: "repo1",
title: "Task 1",
task_type: "MERGE_CONFLICTS",
git_provider: "github",
},
{
issue_number: 2,
repo: "repo1",
title: "Task 2",
task_type: "FAILING_CHECKS",
git_provider: "github",
},
{
issue_number: 3,
repo: "repo2",
title: "Task 3",
task_type: "UNRESOLVED_COMMENTS",
git_provider: "github",
},
{
issue_number: 4,
repo: "repo2",
title: "Task 4",
task_type: "OPEN_ISSUE",
git_provider: "github",
},
{
issue_number: 5,
repo: "repo3",
title: "Task 5",
task_type: "FAILING_CHECKS",
git_provider: "github",
},
];
const groupedTasks: SuggestedTaskGroup[] = [
{
title: "repo1",
tasks: [
{
issue_number: 1,
repo: "repo1",
title: "Task 1",
task_type: "MERGE_CONFLICTS",
git_provider: "github",
},
{
issue_number: 2,
repo: "repo1",
title: "Task 2",
task_type: "FAILING_CHECKS",
git_provider: "github",
},
],
},
{
title: "repo2",
tasks: [
{
issue_number: 3,
repo: "repo2",
title: "Task 3",
task_type: "UNRESOLVED_COMMENTS",
git_provider: "github",
},
{
issue_number: 4,
repo: "repo2",
title: "Task 4",
task_type: "OPEN_ISSUE",
git_provider: "github",
},
],
},
{
title: "repo3",
tasks: [
{
issue_number: 5,
repo: "repo3",
title: "Task 5",
task_type: "FAILING_CHECKS",
git_provider: "github",
},
],
},
];
test("groupSuggestedTasks", () => {
expect(groupSuggestedTasks(rawTasks)).toEqual(groupedTasks);
});

View File

@@ -7,6 +7,10 @@ describe("hasAdvancedSettingsSet", () => {
expect(hasAdvancedSettingsSet(DEFAULT_SETTINGS)).toBe(false);
});
it("should return false if an empty object", () => {
expect(hasAdvancedSettingsSet({})).toBe(false);
});
describe("should be true if", () => {
test("LLM_BASE_URL is set", () => {
expect(
@@ -26,15 +30,6 @@ describe("hasAdvancedSettingsSet", () => {
).toBe(true);
});
test("REMOTE_RUNTIME_RESOURCE_FACTOR is not default value", () => {
expect(
hasAdvancedSettingsSet({
...DEFAULT_SETTINGS,
REMOTE_RUNTIME_RESOURCE_FACTOR: 999,
}),
).toBe(true);
});
test("CONFIRMATION_MODE is true", () => {
expect(
hasAdvancedSettingsSet({

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"name": "openhands-frontend",
"version": "0.32.0",
"version": "0.34.0",
"private": true,
"type": "module",
"engines": {
@@ -8,44 +8,46 @@
},
"dependencies": {
"@heroui/react": "2.7.6",
"@microlink/react-json-view": "^1.26.1",
"@monaco-editor/react": "^4.7.0-rc.0",
"@react-router/node": "^7.5.0",
"@react-router/serve": "^7.5.0",
"@react-types/shared": "^3.28.0",
"@reduxjs/toolkit": "^2.6.1",
"@react-router/node": "^7.5.2",
"@react-router/serve": "^7.5.2",
"@react-types/shared": "^3.29.0",
"@reduxjs/toolkit": "^2.7.0",
"@stripe/react-stripe-js": "^3.6.0",
"@stripe/stripe-js": "^7.0.0",
"@tanstack/react-query": "^5.72.1",
"@vitejs/plugin-react": "^4.3.2",
"@stripe/stripe-js": "^7.2.0",
"@tanstack/react-query": "^5.74.7",
"@vitejs/plugin-react": "^4.4.0",
"@xterm/addon-fit": "^0.10.0",
"@xterm/xterm": "^5.4.0",
"axios": "^1.8.4",
"axios": "^1.9.0",
"clsx": "^2.1.1",
"eslint-config-airbnb-typescript": "^18.0.0",
"framer-motion": "^12.6.3",
"i18next": "^24.2.3",
"i18next-browser-languagedetector": "^8.0.4",
"framer-motion": "^12.9.2",
"i18next": "^25.0.1",
"i18next-browser-languagedetector": "^8.0.5",
"i18next-http-backend": "^3.0.2",
"isbot": "^5.1.25",
"isbot": "^5.1.27",
"jose": "^6.0.10",
"lucide-react": "^0.503.0",
"monaco-editor": "^0.52.2",
"posthog-js": "^1.235.0",
"posthog-js": "^1.236.7",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"react-highlight": "^0.15.0",
"react-hot-toast": "^2.5.1",
"react-i18next": "^15.4.1",
"react-i18next": "^15.5.1",
"react-icons": "^5.5.0",
"react-markdown": "^10.1.0",
"react-redux": "^9.2.0",
"react-router": "^7.5.0",
"react-router": "^7.5.2",
"react-syntax-highlighter": "^15.6.1",
"react-textarea-autosize": "^8.5.9",
"remark-gfm": "^4.0.1",
"sirv-cli": "^3.0.1",
"socket.io-client": "^4.8.1",
"tailwind-merge": "^3.2.0",
"vite": "^6.2.5",
"vite": "^6.3.3",
"web-vitals": "^3.5.2",
"ws": "^8.18.1"
},
@@ -79,29 +81,29 @@
"@babel/traverse": "^7.27.0",
"@babel/types": "^7.27.0",
"@mswjs/socket.io-binding": "^0.1.1",
"@playwright/test": "^1.51.1",
"@react-router/dev": "^7.5.0",
"@playwright/test": "^1.52.0",
"@react-router/dev": "^7.5.2",
"@tailwindcss/typography": "^0.5.16",
"@tanstack/eslint-plugin-query": "^5.72.1",
"@tanstack/eslint-plugin-query": "^5.74.7",
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.6.1",
"@testing-library/react": "^16.3.0",
"@testing-library/user-event": "^14.6.1",
"@types/node": "^22.14.0",
"@types/react": "^19.1.0",
"@types/node": "^22.15.3",
"@types/react": "^19.1.2",
"@types/react-dom": "^19.1.1",
"@types/react-highlight": "^0.12.8",
"@types/react-syntax-highlighter": "^15.5.13",
"@types/ws": "^8.18.1",
"@typescript-eslint/eslint-plugin": "^7.18.0",
"@typescript-eslint/parser": "^7.18.0",
"@vitest/coverage-v8": "^3.1.1",
"@vitest/coverage-v8": "^3.1.2",
"autoprefixer": "^10.4.21",
"cross-env": "^7.0.3",
"eslint": "^8.57.0",
"eslint-config-airbnb": "^19.0.4",
"eslint-config-airbnb-typescript": "^18.0.0",
"eslint-config-prettier": "^10.1.1",
"eslint-config-prettier": "^10.1.2",
"eslint-plugin-import": "^2.29.1",
"eslint-plugin-jsx-a11y": "^6.10.2",
"eslint-plugin-prettier": "^5.2.6",
@@ -109,8 +111,8 @@
"eslint-plugin-react-hooks": "^4.6.2",
"eslint-plugin-unused-imports": "^4.1.4",
"husky": "^9.1.7",
"jsdom": "^26.0.0",
"lint-staged": "^15.5.0",
"jsdom": "^26.1.0",
"lint-staged": "^15.5.1",
"msw": "^2.6.6",
"postcss": "^8.5.2",
"prettier": "^3.5.3",

View File

@@ -8,7 +8,7 @@
* - Please do NOT serve this file on production.
*/
const PACKAGE_VERSION = '2.7.3'
const PACKAGE_VERSION = '2.7.5'
const INTEGRITY_CHECKSUM = '00729d72e3b82faf54ca8b9621dbb96f'
const IS_MOCKED_RESPONSE = Symbol('isMockedResponse')
const activeClientIds = new Set()

View File

@@ -39,6 +39,7 @@ const IGNORE_PATHS = [
"entry.client.tsx", // Client entry point
"utils/scan-unlocalized-strings.ts", // Original scanner
"utils/scan-unlocalized-strings-ast.ts", // This file itself
"frontend/src/components/features/home/tasks/get-prompt-for-query.ts", // Only contains agent prompts
];
// Extensions to scan
@@ -105,6 +106,11 @@ function isRawTranslationKey(str) {
// Specific technical strings that should be excluded from localization
const EXCLUDED_TECHNICAL_STRINGS = [
"openid email profile", // OAuth scope string - not user-facing
"OPEN_ISSUE", // Task type identifier, not a UI string
"Merge Request", // Git provider specific terminology
"GitLab API", // Git provider specific terminology
"Pull Request", // Git provider specific terminology
"GitHub API", // Git provider specific terminology
];
function isExcludedTechnicalString(str) {
@@ -275,8 +281,8 @@ function isCommonDevelopmentString(str) {
// HTML tags and attributes
if (
/^<[a-z0-9]+>.*<\/[a-z0-9]+>$/.test(str) ||
/^<[a-z0-9]+ [^>]+\/>$/.test(str)
/^<[a-z0-9]+(?:\s[^>]*)?>.*<\/[a-z0-9]+>$/i.test(str) ||
/^<[a-z0-9]+ [^>]+\/>$/i.test(str)
) {
return true;
}

View File

@@ -0,0 +1,49 @@
import { openHands } from "./open-hands-axios";
export interface ApiKey {
id: string;
name: string;
prefix: string;
created_at: string;
last_used_at: string | null;
}
export interface CreateApiKeyResponse {
id: string;
name: string;
key: string; // Full key, only returned once upon creation
prefix: string;
created_at: string;
}
class ApiKeysClient {
/**
* Get all API keys for the current user
*/
static async getApiKeys(): Promise<ApiKey[]> {
const { data } = await openHands.get<unknown>("/api/keys");
// Ensure we always return an array, even if the API returns something else
return Array.isArray(data) ? (data as ApiKey[]) : [];
}
/**
* Create a new API key
* @param name - A descriptive name for the API key
*/
static async createApiKey(name: string): Promise<CreateApiKeyResponse> {
const { data } = await openHands.post<CreateApiKeyResponse>("/api/keys", {
name,
});
return data;
}
/**
* Delete an API key
* @param id - The ID of the API key to delete
*/
static async deleteApiKey(id: string): Promise<void> {
await openHands.delete(`/api/keys/${id}`);
}
}
export default ApiKeysClient;

View File

@@ -8,10 +8,14 @@ import {
Conversation,
ResultSet,
GetTrajectoryResponse,
GitChangeDiff,
GitChange,
ConversationTrigger,
} from "./open-hands.types";
import { openHands } from "./open-hands-axios";
import { ApiSettings, PostApiSettings } from "#/types/settings";
import { GitUser, GitRepository } from "#/types/git";
import { SuggestedTask } from "#/components/features/home/tasks/task.types";
class OpenHands {
/**
@@ -147,17 +151,21 @@ class OpenHands {
}
static async createConversation(
conversation_trigger: ConversationTrigger = "gui",
selectedRepository?: GitRepository,
initialUserMsg?: string,
imageUrls?: string[],
replayJson?: string,
suggested_task?: SuggestedTask,
): Promise<Conversation> {
const body = {
conversation_trigger,
selected_repository: selectedRepository,
selected_branch: undefined,
initial_user_msg: initialUserMsg,
image_urls: imageUrls,
replay_json: replayJson,
suggested_task,
};
const { data } = await openHands.post<Conversation>(
@@ -197,14 +205,6 @@ class OpenHands {
return data.status === 200;
}
/**
* Reset user settings in server
*/
static async resetSettings(): Promise<boolean> {
const response = await openHands.post("/api/reset-settings");
return response.status === 200;
}
static async createCheckoutSession(amount: number): Promise<string> {
const { data } = await openHands.post(
"/api/billing/create-checkout-session",
@@ -277,6 +277,26 @@ class OpenHands {
appMode === "saas" ? "/api/logout" : "/api/unset-settings-tokens";
await openHands.post(endpoint);
}
static async getGitChanges(conversationId: string): Promise<GitChange[]> {
const { data } = await openHands.get<GitChange[]>(
`/api/conversations/${conversationId}/git/changes`,
);
return data;
}
static async getGitChangeDiff(
conversationId: string,
path: string,
): Promise<GitChangeDiff> {
const { data } = await openHands.get<GitChangeDiff>(
`/api/conversations/${conversationId}/git/diff`,
{
params: { path },
},
);
return data;
}
}
export default OpenHands;

View File

@@ -70,6 +70,8 @@ export interface AuthenticateResponse {
error?: string;
}
export type ConversationTrigger = "resolver" | "gui" | "suggested_task";
export interface Conversation {
conversation_id: string;
title: string;
@@ -77,9 +79,22 @@ export interface Conversation {
last_updated_at: string;
created_at: string;
status: ProjectStatus;
trigger?: ConversationTrigger;
}
export interface ResultSet<T> {
results: T[];
next_page_id: string | null;
}
export type GitChangeStatus = "M" | "A" | "D" | "R" | "U";
export interface GitChange {
status: GitChangeStatus;
path: string;
}
export interface GitChangeDiff {
modified: string;
original: string;
}

View File

@@ -0,0 +1,9 @@
import { SuggestedTask } from "#/components/features/home/tasks/task.types";
import { openHands } from "../open-hands-axios";
export class SuggestionsService {
static async getSuggestedTasks(): Promise<SuggestedTask[]> {
const { data } = await openHands.get("/api/user/suggested-tasks");
return data;
}
}

Some files were not shown because too many files have changed in this diff Show More