Fix issue #3325 : '[Documentation]: config.toml options should be documented on the doc web site'

Update instruction for new version of eval runtime-api (#4128 )
Update PR Template for better release notes (#4126 )
2026-04-29 03:00:45 -04:00 · 2024-10-01 14:24:18 +00:00 · 2024-09-30 23:48:38 +00:00 · 2024-09-30 17:06:56 -04:00
15 changed files with 63 additions and 40 deletions
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,4 +1,6 @@
-**Short description of the problem this fixes or functionality that this introduces. This may be used for the CHANGELOG**
+- [ ] Include this change in the Release Notes. If checked, you must provide an **end-user friendly** description for your change below
+
+**End-user friendly description of the problem this fixes or functionality that this introduces**



--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -8,6 +8,11 @@ const sidebars: SidebarsConfig = {
      label: 'Getting Started',
      id: 'usage/getting-started',
    },
+    {
+      type: 'doc',
+      label: 'Configuration',
+      id: 'src/configuration',
+    },
    {
      type: 'category',
      label: 'LLMs',
--- a/docs/src/configuration.md
+++ b/docs/src/configuration.md
@@ -0,0 +1,41 @@
+# OpenDevin Configuration Options
+
+OpenDevin provides various configuration options to customize its behavior. This page documents all available options.
+
+## General Configuration
+
+- `project_name`: The name of your project.
+- `output_dir`: The directory where output files will be saved.
+- `max_iterations`: The maximum number of iterations for the AI to attempt solving a task.
+- `max_time`: The maximum time (in seconds) for the AI to work on a task.
+
+## AI Model Configuration
+
+- `model`: The AI model to use (e.g., "gpt-4", "gpt-3.5-turbo").
+- `temperature`: Controls the randomness of the AI's output (0.0 to 1.0).
+- `max_tokens`: The maximum number of tokens to generate in the AI's response.
+
+## Execution Environment
+
+- `python_path`: The path to the Python interpreter to use.
+- `allowed_modules`: A list of Python modules that are allowed to be imported.
+- `timeout`: The maximum execution time for a single command (in seconds).
+
+## Logging and Debugging
+
+- `log_level`: The level of logging detail (e.g., "DEBUG", "INFO", "WARNING", "ERROR").
+- `log_file`: The file path for saving logs.
+- `debug_mode`: Enable or disable debug mode (true/false).
+
+## Security
+
+- `allow_internet_access`: Allow the AI to access the internet (true/false).
+- `allowed_domains`: A list of allowed domains if internet access is enabled.
+- `max_file_size`: The maximum size (in bytes) of files that can be created or modified.
+
+## Custom Behavior
+
+- `custom_prompts`: A dictionary of custom prompts to use for specific tasks.
+- `task_specific_settings`: A dictionary of settings that apply to specific tasks or modules.
+
+Please refer to the OpenDevin documentation for more detailed information on how to use these configuration options in your project.
--- a/evaluation/swe_bench/README.md
+++ b/evaluation/swe_bench/README.md
@@ -69,7 +69,7 @@ This is in limited beta. Contact Xingyao over slack if you want to try this out!

 ```bash
 # ./evaluation/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [dataset] [dataset_split]
-ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote EVAL_DOCKER_IMAGE_PREFIX="us-docker.pkg.dev/evaluation-428620/swe-bench-images" \
+ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
 ./evaluation/swe_bench/scripts/run_infer.sh llm.eval HEAD CodeActAgent 300 30 16 "princeton-nlp/SWE-bench_Lite" test
 # This example runs evaluation on CodeActAgent for 300 instances on "princeton-nlp/SWE-bench_Lite"'s test set, with max 30 iteration per instances, with 16 number of workers running in parallel
 ```
@@ -163,7 +163,8 @@ This is in limited beta. Contact Xingyao over slack if you want to try this out!

 ```bash
 # ./evaluation/swe_bench/scripts/eval_infer_remote.sh [output.jsonl filepath] [num_workers]
-ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote EVAL_DOCKER_IMAGE_PREFIX="us-docker.pkg.dev/evaluation-428620/swe-bench-images" evaluation/swe_bench/scripts/eval_infer_remote.sh evaluation/outputs/swe_bench_lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_30_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
+ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
+evaluation/swe_bench/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/swe_bench_lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_30_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
 # This example evaluate patches generated by CodeActAgent on Llama-3.1-70B-Instruct-Turbo on "princeton-nlp/SWE-bench_Lite"'s test set, with 16 number of workers running in parallel
 ```

--- a/evaluation/swe_bench/eval_infer.py
+++ b/evaluation/swe_bench/eval_infer.py
@@ -81,6 +81,7 @@ def get_config(instance: pd.Series) -> AppConfig:
            # large enough timeout, since some testcases take very long to run
            timeout=1800,
            api_key=os.environ.get('ALLHANDS_API_KEY', None),
+            remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
        ),
        # do not mount workspace
        workspace_base=None,
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -131,6 +131,7 @@ def get_config(
            # large enough timeout, since some testcases take very long to run
            timeout=300,
            api_key=os.environ.get('ALLHANDS_API_KEY', None),
+            remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
        ),
        # do not mount workspace
        workspace_base=None,
--- a/evaluation/swe_bench/scripts/cleanup_remote_runtime.sh
+++ b/evaluation/swe_bench/scripts/cleanup_remote_runtime.sh
@@ -2,10 +2,10 @@


 # API base URL
-BASE_URL="https://api.all-hands.dev/v0"
+BASE_URL="https://runtime.eval.all-hands.dev"

 # Get the list of runtimes
-response=$(curl --silent --location --request GET "${BASE_URL}/runtime/list" \
+response=$(curl --silent --location --request GET "${BASE_URL}/list" \
  --header "X-API-Key: ${ALLHANDS_API_KEY}")

 n_runtimes=$(echo $response | jq -r '.total')
@@ -16,7 +16,7 @@ runtime_ids=$(echo $response | jq -r '.runtimes | .[].runtime_id')
 counter=1
 for runtime_id in $runtime_ids; do
  echo "Stopping runtime ${counter}/${n_runtimes}: ${runtime_id}"
-  curl --silent --location --request POST "${BASE_URL}/runtime/stop" \
+  curl --silent --location --request POST "${BASE_URL}/stop" \
    --header "X-API-Key: ${ALLHANDS_API_KEY}" \
    --header "Content-Type: application/json" \
    --data-raw "{\"runtime_id\": \"${runtime_id}\"}"
--- a/frontend/src/components/AgentStatusBar.tsx
+++ b/frontend/src/components/AgentStatusBar.tsx
@@ -20,13 +20,9 @@ function AgentStatusBar() {
  const { curAgentState } = useSelector((state: RootState) => state.agent);
  const { curStatusMessage } = useSelector((state: RootState) => state.status);

-const AgentStatusMap: {
+  const AgentStatusMap: {
    [k: string]: { message: string; indicator: IndicatorColor };
  } = {
-[AgentState.RATE_LIMITED]: {
-      message: t(I18nKey.AGENT_STATUS$RATE_LIMITED_MESSAGE),
-      indicator: IndicatorColor.YELLOW,
-    },
    [AgentState.INIT]: {
      message: t(I18nKey.CHAT_INTERFACE$AGENT_INIT_MESSAGE),
      indicator: IndicatorColor.BLUE,
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@@ -1,18 +1,4 @@
 {
-"AGENT_STATUS$RATE_LIMITED_MESSAGE": {
-    "en": "Agent is rate limited. Please wait.",
-    "zh-CN": "代理已达到速率限制。请稍候。",
-    "de": "Agent ist ratenbegrenzt. Bitte warten.",
-    "ko-KR": "에이전트가 속도 제한되었습니다. 잠시만 기다려주세요.",
-    "no": "Agenten er hastighetsbegrenset. Vennligst vent.",
-    "zh-TW": "代理已達到速率限制。請稍候。",
-    "it": "L'agente è limitato dalla velocità. Attendere prego.",
-    "pt": "O agente está com limite de taxa. Por favor, aguarde.",
-    "es": "El agente está limitado por tasa. Por favor, espere.",
-    "ar": "الوكيل محدود بمعدل. يرجى الانتظار.",
-    "fr": "L'agent est limité en débit. Veuillez patienter.",
-    "tr": "Ajan hız sınırlamasına tabi. Lütfen bekleyin."
-  },
  "WORKSPACE$TITLE": {
    "en": "OpenHands Workspace",
    "zh-CN": "OpenHands 工作区",
--- a/frontend/src/types/AgentState.tsx
+++ b/frontend/src/types/AgentState.tsx
@@ -1,5 +1,4 @@
 enum AgentState {
-  RATE_LIMITED = "rate_limited",
  LOADING = "loading",
  INIT = "init",
  RUNNING = "running",
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -500,7 +500,7 @@ class AgentController:
            self.event_stream.add_event(obs, EventSource.AGENT)
        return

-async def _handle_traffic_control(
+    async def _handle_traffic_control(
        self, limit_type: str, current_value: float, max_value: float
    ):
        """Handles agent state after hitting the traffic control limit.
@@ -511,8 +511,6 @@ async def _handle_traffic_control(
            max_value (float): The maximum value of the limit.
        """
        stop_step = False
-        self.state.agent_state = AgentState.RATE_LIMITED
-        await self._publish_agent_state_changed()
        if self.state.traffic_control_state == TrafficControlState.PAUSED:
            logger.info('Hitting traffic control, temporarily resume upon user request')
            self.state.traffic_control_state = TrafficControlState.NORMAL
--- a/openhands/core/schema/agent.py
+++ b/openhands/core/schema/agent.py
@@ -6,10 +6,6 @@ class AgentState(str, Enum):
    """The agent is loading.
    """

-    RATE_LIMITED = 'rate_limited'
-    """The agent is rate limited.
-    """
-
    INIT = 'init'
    """The agent is initialized.
    """
--- a/poetry.lock
+++ b/poetry.lock
@@ -9685,4 +9685,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "1e4da073f36492c1db18dc0134a2f13746519bd529abc6a54dea579c4f89b260"
+content-hash = "78e09d0b5c33f39ec951659658b5b4b46ba206d8f95e9a154be4e0ef869b7c79"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,6 @@ python-pptx = "*"
 pylatexenc = "*"
 tornado = "*"
 python-dotenv = "*"
-pytest = "^8.3.3"

 [tool.poetry.group.llama-index.dependencies]
 llama-index = "*"
@@ -86,7 +85,6 @@ reportlab = "*"
 [tool.coverage.run]
 concurrency = ["gevent"]

-
 [tool.poetry.group.runtime.dependencies]
 jupyterlab = "*"
 notebook = "*"
@@ -117,7 +115,6 @@ ignore = ["D1"]
 [tool.ruff.lint.pydocstyle]
 convention = "google"

-
 [tool.poetry.group.evaluation.dependencies]
 streamlit = "*"
 whatthepatch = "*"
--- a/tests/unit/test_response_parsing.py
+++ b/tests/unit/test_response_parsing.py
@@ -1,5 +1,7 @@
 import pytest

+from agenthub.micro.agent import parse_response as parse_response_micro
+from agenthub.planner_agent.prompt import parse_response as parse_response_planner
 from openhands.core.exceptions import LLMResponseError
 from openhands.core.utils.json import loads as custom_loads
 from openhands.events.action import (
@@ -7,12 +9,10 @@ from openhands.events.action import (
    MessageAction,
 )

-# TODO: Replace this with the correct import for parse_response
-from openhands.utils.microagent import parse_response

@pytest.mark.parametrize(
    'parse_response_module',
-    [parse_response],
+    [parse_response_micro, parse_response_planner],
 )
 def test_parse_single_complete_json(parse_response_module):
    input_response = """
Author	SHA1	Message	Date
openhands	e1a848e7cd	Fix issue #3325 : '[Documentation]: config.toml options should be documented on the doc web site'	2024-10-01 14:24:18 +00:00
Xingyao Wang	1109637efb	Update instruction for new version of eval runtime-api (#4128 )	2024-09-30 23:48:38 +00:00
mamoodi	71adfeebab	Update PR Template for better release notes (#4126 )	2024-09-30 17:06:56 -04:00