From 797f02ff6f51670c6588c425cce933b853dee3db Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Thu, 12 Sep 2024 13:50:26 -0500
Subject: [PATCH] rename huggingface evaluation benchmark (#3845)

---
 README.md                                             | 2 +-
 docs/src/components/HomepageHeader/HomepageHeader.tsx | 2 +-
 evaluation/README.md                                  | 6 +++---
 evaluation/miniwob/README.md                          | 2 +-
 evaluation/swe_bench/README.md                        | 8 ++++----
 evaluation/webarena/README.md                         | 2 +-
 6 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/README.md b/README.md
index 8bddb5e7e8..d50c1ad164 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@
   <br/>
   <a href="https://docs.all-hands.dev/modules/usage/getting-started"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
   <a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv"></a>
-  <a href="https://huggingface.co/spaces/OpenDevin/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
+  <a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
   <hr>
 </div>
 
diff --git a/docs/src/components/HomepageHeader/HomepageHeader.tsx b/docs/src/components/HomepageHeader/HomepageHeader.tsx
index 59d01d7962..f421b2897a 100644
--- a/docs/src/components/HomepageHeader/HomepageHeader.tsx
+++ b/docs/src/components/HomepageHeader/HomepageHeader.tsx
@@ -29,7 +29,7 @@ export function HomepageHeader() {
           <br/>
           <a href="https://docs.all-hands.dev/modules/usage/getting-started"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation" /></a>
           <a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv" /></a>
-          <a href="https://huggingface.co/spaces/OpenDevin/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score" /></a>
+          <a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score" /></a>
         </div>
 
         <Demo />
diff --git a/evaluation/README.md b/evaluation/README.md
index f0516511b9..7a555e3ee6 100644
--- a/evaluation/README.md
+++ b/evaluation/README.md
@@ -9,7 +9,7 @@ To better organize the evaluation folder, we should follow the rules below:
 - Each subfolder contains a specific benchmark or experiment. For example, `evaluation/swe_bench` should contain
 all the preprocessing/evaluation/analysis scripts.
 - Raw data and experimental records should not be stored within this repo.
-- For model outputs, they should be stored at [this huggingface space](https://huggingface.co/spaces/OpenDevin/evaluation) for visualization.
+- For model outputs, they should be stored at [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization.
 - Important data files of manageable size and analysis scripts (e.g., jupyter notebooks) can be directly uploaded to this repo.
 
 ## Supported Benchmarks
@@ -69,8 +69,8 @@ temperature = 0.0
 
 ### Result Visualization
 
-Check [this huggingface space](https://huggingface.co/spaces/OpenDevin/evaluation) for visualization of existing experimental results.
+Check [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization of existing experimental results.
 
 ### Upload your results
 
-You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results to our hosted huggingface repo via PR following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
+You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results to our hosted huggingface repo via PR following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
diff --git a/evaluation/miniwob/README.md b/evaluation/miniwob/README.md
index fe833eec97..b0b0545406 100644
--- a/evaluation/miniwob/README.md
+++ b/evaluation/miniwob/README.md
@@ -26,7 +26,7 @@ poetry run python evaluation/miniwob/get_success_rate.py evaluation/evaluation_o
 
 ## Submit your evaluation results
 
-You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
+You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
 
 
 ## BrowsingAgent V1.0 result
diff --git a/evaluation/swe_bench/README.md b/evaluation/swe_bench/README.md
index e371654670..7bb82995de 100644
--- a/evaluation/swe_bench/README.md
+++ b/evaluation/swe_bench/README.md
@@ -125,7 +125,7 @@ With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patc
 
 > If you want to evaluate existing results, you should first run this to clone existing outputs
 >```bash
->git clone https://huggingface.co/spaces/OpenDevin/evaluation evaluation/evaluation_outputs
+>git clone https://huggingface.co/spaces/OpenHands/evaluation evaluation/evaluation_outputs
 >```
 
 NOTE, you should have already pulled the instance-level OR env-level docker images following [this section](#openhands-swe-bench-instance-level-docker-support).
@@ -159,10 +159,10 @@ The final results will be saved to `evaluation/evaluation_outputs/outputs/swe_be
 
 ## Visualize Results
 
-First you need to clone `https://huggingface.co/spaces/OpenDevin/evaluation` and add your own running results from openhands into the `outputs` of the cloned repo.
+First you need to clone `https://huggingface.co/spaces/OpenHands/evaluation` and add your own running results from openhands into the `outputs` of the cloned repo.
 
 ```bash
-git clone https://huggingface.co/spaces/OpenDevin/evaluation
+git clone https://huggingface.co/spaces/OpenHands/evaluation
 ```
 
 **(optional) setup streamlit environment with conda**:
@@ -186,4 +186,4 @@ Then you can access the SWE-Bench trajectory visualizer at `localhost:8501`.
 
 ## Submit your evaluation results
 
-You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
+You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
diff --git a/evaluation/webarena/README.md b/evaluation/webarena/README.md
index fa2bda53d9..e81f92c592 100644
--- a/evaluation/webarena/README.md
+++ b/evaluation/webarena/README.md
@@ -37,7 +37,7 @@ poetry run python evaluation/webarena/get_success_rate.py evaluation/evaluation_
 
 ## Submit your evaluation results
 
-You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
+You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
 
 ## BrowsingAgent V1.0 result