mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-09 14:57:59 -05:00
[eval] SWE-Gym Integration (#6651)
Co-authored-by: Robert Brennan <accounts@rbren.io> Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Graham Neubig <neubig@gmail.com>
This commit is contained in:
45
poetry.lock
generated
45
poetry.lock
generated
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohappyeyeballs"
|
||||
@@ -8938,7 +8938,7 @@ files = [
|
||||
|
||||
[package.dependencies]
|
||||
greenlet = [
|
||||
{version = "!=0.4.17", optional = true, markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") or extra == \"asyncio\""},
|
||||
{version = "!=0.4.17", markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"},
|
||||
{version = "!=0.4.17", optional = true, markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") or extra == \"asyncio\""},
|
||||
]
|
||||
typing-extensions = ">=4.6.0"
|
||||
@@ -9109,14 +9109,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "swebench"
|
||||
version = "3.0.13"
|
||||
version = "3.0.15"
|
||||
description = "The official SWE-bench package - a benchmark for evaluating LMs on software engineering"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["evaluation"]
|
||||
files = [
|
||||
{file = "swebench-3.0.13-py3-none-any.whl", hash = "sha256:0949e0a7269fcebb287dd951d14c049bd8189c7740fc4878354dbec756531c0f"},
|
||||
{file = "swebench-3.0.13.tar.gz", hash = "sha256:d1cce406d0674cb1f3ca7da90089644d1ded3649c98f239a5a7ef4829d2f7c58"},
|
||||
{file = "swebench-3.0.15-py3-none-any.whl", hash = "sha256:dd694356f9c155a55d3d2e113fe58446f7385eea0574230af5e2504426f8b85b"},
|
||||
{file = "swebench-3.0.15.tar.gz", hash = "sha256:24e734fbcce34082665a25719075e6899382b7135103dd8c6cc09a6e23789101"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -9139,6 +9139,39 @@ unidiff = "*"
|
||||
inference = ["anthropic", "flash_attn", "jedi", "openai", "peft", "protobuf", "sentencepiece", "tiktoken", "torch", "transformers", "triton"]
|
||||
test = ["pytest", "pytest-cov"]
|
||||
|
||||
[[package]]
|
||||
name = "swegym"
|
||||
version = "2.0.13"
|
||||
description = "Fork of SWE-bench package - a benchmark for evaluating LMs on software engineering"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["evaluation"]
|
||||
files = []
|
||||
develop = false
|
||||
|
||||
[package.dependencies]
|
||||
beautifulsoup4 = "*"
|
||||
chardet = "*"
|
||||
datasets = "*"
|
||||
docker = "*"
|
||||
ghapi = "*"
|
||||
GitPython = "*"
|
||||
pre-commit = "*"
|
||||
python-dotenv = "*"
|
||||
requests = "*"
|
||||
rich = "*"
|
||||
tqdm = "*"
|
||||
unidiff = "*"
|
||||
|
||||
[package.extras]
|
||||
inference = ["anthropic", "flash_attn", "jedi", "openai", "peft", "protobuf", "sentencepiece", "tenacity", "tiktoken", "torch", "transformers", "triton"]
|
||||
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/SWE-Gym/SWE-Bench-Package.git"
|
||||
reference = "HEAD"
|
||||
resolved_reference = "16dd480cce9b27bf111a362d280881c6def5d2a7"
|
||||
|
||||
[[package]]
|
||||
name = "sympy"
|
||||
version = "1.13.1"
|
||||
@@ -10855,4 +10888,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "83da0b681253a79417c9842862cdd102c1ab6e8770d9dd9e0c42bc7994be2cd0"
|
||||
content-hash = "c3f32c54606e5f313d9a909625f77cc3d575bf951e986633bcecd94520f36450"
|
||||
|
||||
Reference in New Issue
Block a user