fix: ensure panels occupy full window size when window is large

- Add proper size constraints based on window dimensions - Add window resize handling to maintain constraints - Improve panel styles with proper flex behavior - Set appropriate min/max dimensions for both panels
Add a stress test for eventstream runtime (#6038 )
2026-04-29 03:00:45 -04:00 · 2025-01-07 03:39:57 +00:00 · 2025-01-06 22:36:59 +00:00 · 2025-01-07 06:22:58 +08:00 · 2025-01-06 21:59:42 +00:00 · 2025-01-06 14:26:48 -07:00
157 changed files with 3969 additions and 1349 deletions
@@ -36,6 +36,8 @@ jobs:
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@v3
+      - name: Install tmux
+        run: sudo apt-get update && sudo apt-get install -y tmux
      - name: Install poetry via pipx
        run: pipx install poetry
      - name: Set up Python
@@ -29,6 +29,8 @@ jobs:
      - name: Checkout repository
        uses: actions/checkout@v4

+      - name: Install tmux
+        run: sudo apt-get update && sudo apt-get install -y tmux
      - name: Install poetry via pipx
        run: pipx install poetry

@@ -56,7 +56,7 @@ jobs:
          docker-images: false
          swap-storage: true
      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3.0.0
+        uses: docker/setup-qemu-action@v3.2.0
        with:
          image: tonistiigi/binfmt:latest
      - name: Login to GHCR
@@ -119,7 +119,7 @@ jobs:
          docker-images: false
          swap-storage: true
      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3.0.0
+        uses: docker/setup-qemu-action@v3.2.0
        with:
          image: tonistiigi/binfmt:latest
      - name: Login to GHCR
@@ -31,6 +31,8 @@ jobs:
          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
          restore-keys: |
            ${{ runner.os }}-poetry-
+      - name: Install tmux
+        run: brew install tmux
      - name: Install poetry via pipx
        run: pipx install poetry
      - name: Install Python dependencies using Poetry
@@ -30,6 +30,8 @@ jobs:
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@v3
+      - name: Install tmux
+        run: sudo apt-get update && sudo apt-get install -y tmux
      - name: Install poetry via pipx
        run: pipx install poetry
      - name: Set up Python
@@ -106,7 +106,7 @@ check-poetry:
 	@if command -v poetry > /dev/null; then \
 		POETRY_VERSION=$(shell poetry --version 2>&1 | sed -E 's/Poetry \(version ([0-9]+\.[0-9]+\.[0-9]+)\)/\1/'); \
 		IFS='.' read -r -a POETRY_VERSION_ARRAY <<< "$$POETRY_VERSION"; \
-		if [ $${POETRY_VERSION_ARRAY[0]} -ge 1 ] && [ $${POETRY_VERSION_ARRAY[1]} -ge 8 ]; then \
+		if [ $${POETRY_VERSION_ARRAY[0]} -gt 1 ] || ([ $${POETRY_VERSION_ARRAY[0]} -eq 1 ] && [ $${POETRY_VERSION_ARRAY[1]} -ge 8 ]); then \
 			echo "$(BLUE)$(shell poetry --version) is already installed.$(RESET)"; \
 		else \
 			echo "$(RED)Poetry 1.8 or later is required. You can install poetry by running the following command, then adding Poetry to your PATH:"; \
@@ -190,7 +190,7 @@ build-frontend:
 # Start backend
 start-backend:
 	@echo "$(YELLOW)Starting backend...$(RESET)"
-	@poetry run uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) --reload --reload-exclude "$(shell pwd)/workspace"
+	@poetry run uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) --reload --reload-exclude "./workspace"

 # Start frontend
 start-frontend:
@@ -198,6 +198,16 @@ model = "gpt-4o"
 # agent.CodeActAgent
 ##############################################################################
 [agent]
+
+# whether the browsing tool is enabled
+codeact_enable_browsing = true
+
+# whether the LLM draft editor is enabled
+codeact_enable_llm_editor = false
+
+# whether the IPython tool is enabled
+codeact_enable_jupyter = true
+
 # Name of the micro agent to use for this agent
 #micro_agent_name = ""

@@ -210,6 +220,12 @@ model = "gpt-4o"
 # LLM config group to use
 #llm_config = 'your-llm-config-group'

+# Whether to use microagents at all
+#use_microagents = true
+
+# List of microagents to disable
+#disabled_microagents = []
+
 [agent.RepoExplorerAgent]
 # Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
 # useful when an agent doesn't demand high quality but uses a lot of tokens
@@ -123,7 +123,6 @@ class openhands.state.State {
  updated_info: List[Tuple[Action, Observation]]
 }
 class openhands.observation.CmdOutputObservation {
-  command_id: int
  command: str
  exit_code: int
  observation: str
@@ -137,7 +137,6 @@ def complete_runtime(

        action = CmdRunAction(
            command=f'chmod +x ./{script_name} && ./{script_name}',
-            keep_prompt=False,
        )
        logger.info(action, extra={'msg_type': 'ACTION'})
        obs = runtime.run_action(action)
@@ -164,8 +163,7 @@ def complete_runtime(
            logger.info(f'Running get ground truth cmd: {script_name}')

            action = CmdRunAction(
-                command=f'chmod +x ./{script_name} && ./{script_name}',
-                keep_prompt=False,
+                command=f'chmod +x ./{script_name} && ./{script_name}'
            )
            logger.info(action, extra={'msg_type': 'ACTION'})
            obs = runtime.run_action(action)
@@ -145,10 +145,7 @@ def complete_runtime(
        )
        logger.info(f'Running test file: {script_name}')

-    action = CmdRunAction(
-        command=f'python3 -m unittest {script_name}',
-        keep_prompt=False,
-    )
+    action = CmdRunAction(command=f'python3 -m unittest {script_name}')
    logger.info(action, extra={'msg_type': 'ACTION'})
    obs = runtime.run_action(action)
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -199,7 +199,7 @@ def complete_runtime(
    if obs.exit_code == 0:
        test_result['metadata']['1_copy_change_success'] = True

-        action = CmdRunAction(command=f'cat {generated_path}', keep_prompt=False)
+        action = CmdRunAction(command=f'cat {generated_path}')
        logger.info(action, extra={'msg_type': 'ACTION'})
        obs = runtime.run_action(action)
        assert obs.exit_code == 0
@@ -223,9 +223,7 @@ def complete_runtime(
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert obs.exit_code == 0

-    action = CmdRunAction(
-        command='cat /testing_files/results_biocoder.json', keep_prompt=False
-    )
+    action = CmdRunAction(command='cat /testing_files/results_biocoder.json')
    logger.info(action, extra={'msg_type': 'ACTION'})
    obs = runtime.run_action(action)
    if obs.exit_code == 0:
@@ -127,7 +127,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
        "observation": "run",
        "content": "california_schools/california_schools.sqlite\r\n[(1.0,)]",
        "extras": {
-          "command_id": -1,
          "command": "python3 0.py",
          "exit_code": 0
        }
@@ -268,10 +268,7 @@ def initialize_runtime(
    runtime.copy_to(db_file, '/workspace')

    # Check the database is copied
-    action = CmdRunAction(
-        command='cd /workspace && ls -l',
-        keep_prompt=False,
-    )
+    action = CmdRunAction(command='cd /workspace && ls -l')
    obs = runtime.run_action(action)
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert obs.exit_code == 0
@@ -300,10 +297,7 @@ def complete_runtime(
    instance_id = instance.instance_id.replace('/', '__')
    path = os.path.join('/workspace', f'{instance_id}.py')

-    action = CmdRunAction(
-        command=f'cat {path}',
-        keep_prompt=False,
-    )
+    action = CmdRunAction(command=f'cat {path}')
    obs = runtime.run_action(action)
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})

@@ -71,7 +71,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
                "observation": "run",
                "content": "[File: /workspace/Python__2.py (14 lines total)]\r\n1:def truncate_number(number: float) -> float:\r\n2:    return number % 1.0 + 1.0\r\n3:\r\n4:\r\n5:\r\n6:\r\n7:\r\n8:\r\n9:def check(truncate_number):\r\n10:    assert truncate_number(3.5) == 0.5\r\n11:    assert abs(truncate_number(1.33) - 0.33) < 1e-6\r\n12:    assert abs(truncate_number(123.456) - 0.456) < 1e-6\r\n13:\r\n14:check(truncate_number)",
                "extras": {
-                    "command_id": -1,
                    "command": "open Python__2.py",
                    "exit_code": 0
                }
@@ -98,7 +97,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
                "observation": "run",
                "content": "> > [File: /workspace/Python__2.py (14 lines total)]\r\n1:def truncate_number(number: float) -> float:\r\n2:    return number % 1.0\r\n3:\r\n4:\r\n5:\r\n6:\r\n7:\r\n8:\r\n9:def check(truncate_number):\r\n10:    assert truncate_number(3.5) == 0.5\r\n11:    assert abs(truncate_number(1.33) - 0.33) < 1e-6\r\n12:    assert abs(truncate_number(123.456) - 0.456) < 1e-6\r\n13:\r\n14:check(truncate_number)\r\nFile updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.",
                "extras": {
-                    "command_id": -1,
                    "command": "edit 2:2 <<EOF\n    return number % 1.0\nEOF",
                    "exit_code": 0
                }
@@ -125,7 +123,6 @@ For each problem, OpenHands is given a set number of iterations to fix the faili
                "observation": "run",
                "content": "",
                "extras": {
-                    "command_id": -1,
                    "command": "python3 Python__2.py",
                    "exit_code": 0
                }
@@ -171,9 +171,7 @@ def complete_runtime(
    num_workers = LANGUAGE_TO_NUM_WORKERS[language]
    python_imports = '\n'.join(IMPORT_HELPER[language])

-    action = CmdRunAction(
-        command=f'cat /workspace/{_get_instance_id(instance)}.py', keep_prompt=False
-    )
+    action = CmdRunAction(command=f'cat /workspace/{_get_instance_id(instance)}.py')
    obs = runtime.run_action(action)
    assert obs.exit_code == 0

@@ -163,7 +163,7 @@ def complete_runtime(
    eval_script = os.path.join(task_path, 'run.sh')
    logger.info(f'Running evaluation script: {eval_script}')

-    action = CmdRunAction(command=f'cat {eval_script}', keep_prompt=False)
+    action = CmdRunAction(command=f'cat {eval_script}')
    logger.info(action, extra={'msg_type': 'ACTION'})
    obs = runtime.run_action(action)
    if obs.exit_code == 0:
@@ -121,10 +121,7 @@ def initialize_runtime(
    runtime.copy_to(dataset_dir, '/workspace/benchmark/datasets', recursive=True)

    # Check the dataset exists
-    action = CmdRunAction(
-        command='cd /workspace/benchmark/datasets && ls',
-        keep_prompt=False,
-    )
+    action = CmdRunAction(command='cd /workspace/benchmark/datasets && ls')
    obs = runtime.run_action(action)
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert obs.exit_code == 0
@@ -154,10 +151,7 @@ def complete_runtime(

    assert obs.exit_code == 0

-    action = CmdRunAction(
-        command=f'cat pred_programs/{instance.pred_program_name}',
-        keep_prompt=False,
-    )
+    action = CmdRunAction(command=f'cat pred_programs/{instance.pred_program_name}')
    logger.info(action, extra={'msg_type': 'ACTION'})
    obs = runtime.run_action(action)

@@ -204,7 +204,7 @@ Then, in a separate Python environment with `streamlit` library, you can run the
 ```bash
 # Make sure you are inside the cloned `evaluation` repo
 conda activate streamlit # if you follow the optional conda env setup above
-streamlit app.py --server.port 8501 --server.address 0.0.0.0
+streamlit run app.py --server.port 8501 --server.address 0.0.0.0
 ```

 Then you can access the SWE-Bench trajectory visualizer at `localhost:8501`.
@@ -98,6 +98,7 @@ def process_instance(
    metadata: EvalMetadata,
    reset_logger: bool = True,
    log_dir: str | None = None,
+    runtime_failure_count: int = 0,
 ) -> EvalOutput:
    """
    Evaluate agent performance on a SWE-bench problem instance.
@@ -146,6 +147,16 @@ def process_instance(
            metadata=metadata,
        )

+    # Increase resource_factor with increasing attempt_id
+    if runtime_failure_count > 0:
+        config.sandbox.remote_runtime_resource_factor = min(
+            config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
+            4,  # hardcode maximum resource factor to 4
+        )
+        logger.warning(
+            f'This is the second attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
+        )
+
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    # Get patch and save it to /tmp/patch.diff
@@ -177,7 +188,7 @@ def process_instance(
        "(patch --batch --fuzz=5 -p1 -i /tmp/patch.diff && echo 'APPLY_PATCH_PASS' || "
        "echo 'APPLY_PATCH_FAIL')))"
    )
-    action = CmdRunAction(command=exec_command, keep_prompt=False)
+    action = CmdRunAction(command=exec_command)
    action.timeout = 600
    obs = runtime.run_action(action)
    assert isinstance(obs, CmdOutputObservation)
@@ -200,9 +211,7 @@ def process_instance(

            # Run eval script in background and save output to log file
            log_file = '/tmp/eval_output.log'
-            action = CmdRunAction(
-                command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!', keep_prompt=False
-            )
+            action = CmdRunAction(command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!')
            action.timeout = 60  # Short timeout just to get the process ID
            obs = runtime.run_action(action)

@@ -224,7 +233,7 @@ def process_instance(
                        instance['test_result']['report']['test_timeout'] = True
                        break
                    check_action = CmdRunAction(
-                        command=f'ps -p {pid} > /dev/null; echo $?', keep_prompt=False
+                        command=f'ps -p {pid} > /dev/null; echo $?'
                    )
                    check_action.timeout = 60
                    check_obs = runtime.run_action(check_action)
@@ -242,7 +251,7 @@ def process_instance(
                    time.sleep(30)  # Wait for 30 seconds before checking again

                # Read the log file
-                cat_action = CmdRunAction(command=f'cat {log_file}', keep_prompt=False)
+                cat_action = CmdRunAction(command=f'cat {log_file}')
                cat_action.timeout = 300
                cat_obs = runtime.run_action(cat_action)

@@ -282,6 +282,16 @@ def initialize_runtime(
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')

+    action = CmdRunAction(command='which python')
+    action.timeout = 600
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(
+        obs.exit_code == 0 and 'testbed' in obs.content,
+        f'Expected to find python interpreter from testbed, but got: {str(obs)}',
+    )
+
    logger.info('-' * 30)
    logger.info('END Runtime Initialization Fn')
    logger.info('-' * 30)
@@ -337,8 +347,7 @@ def complete_runtime(
    git_patch = None
    while n_retries < 5:
        action = CmdRunAction(
-            command=f'git diff --no-color --cached {instance["base_commit"]}',
-            keep_prompt=False,
+            command=f'git diff --no-color --cached {instance["base_commit"]}'
        )
        action.timeout = 600 + 100 * n_retries
        logger.info(action, extra={'msg_type': 'ACTION'})
@@ -385,7 +394,7 @@ def process_instance(
    if runtime_failure_count > 0:
        config.sandbox.remote_runtime_resource_factor = min(
            config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
-            2,  # hardcode maximum resource factor to 2
+            8,
        )
        logger.warning(
            f'This is the second attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
@@ -535,4 +544,5 @@ if __name__ == '__main__':
        args.eval_num_workers,
        process_instance,
        timeout_seconds=120 * 60,  # 2 hour PER instance should be more than enough
+        max_retries=5,
    )
@@ -104,9 +104,9 @@ for repo, diff in repo_diffs:
    # Determine if this repo has a significant diff
    is_significant = diff >= threshold
    repo_color = 'red' if is_significant else 'yellow'
-    print(colored(f'Difference: {diff} instances!', repo_color, attrs=['bold']))

    print(f"\n{colored(repo, repo_color, attrs=['bold'])}:")
+    print(colored(f'Difference: {diff} instances!', repo_color, attrs=['bold']))
    print(colored(f'X resolved but Y failed: ({len(x_instances)} instances)', 'green'))
    if x_instances:
        print('  ' + str(x_instances))
@@ -20,6 +20,13 @@ output_md_folder = args.oh_output_file.replace('.jsonl', '.viz')
 print(f'Converting {args.oh_output_file} to markdown files in {output_md_folder}')

 oh_format = pd.read_json(args.oh_output_file, orient='records', lines=True)
+
+swebench_eval_file = args.oh_output_file.replace('.jsonl', '.swebench_eval.jsonl')
+if os.path.exists(swebench_eval_file):
+    eval_output_df = pd.read_json(swebench_eval_file, orient='records', lines=True)
+else:
+    eval_output_df = None
+
 # model name is the folder name of oh_output_file
 model_name = os.path.basename(os.path.dirname(args.oh_output_file))

@@ -50,7 +57,7 @@ def convert_history_to_str(history):
    return ret


-def write_row_to_md_file(row):
+def write_row_to_md_file(row, instance_id_to_test_result):
    if 'git_patch' in row:
        model_patch = row['git_patch']
    elif 'test_result' in row and 'git_patch' in row['test_result']:
@@ -58,8 +65,21 @@ def write_row_to_md_file(row):
    else:
        raise ValueError(f'Row {row} does not have a git_patch')

-    if 'report' in row:
-        resolved = row['report'].get('resolved', False)
+    test_output = None
+    if row['instance_id'] in instance_id_to_test_result:
+        report = instance_id_to_test_result[row['instance_id']].get('report', {})
+        resolved = report.get('resolved', False)
+        test_output = instance_id_to_test_result[row['instance_id']].get(
+            'test_output', None
+        )
+    elif 'report' in row and row['report'] is not None:
+        if not isinstance(row['report'], dict):
+            resolved = None
+            print(
+                f'ERROR: Report is not a dict, but a {type(row["report"])}. Row: {row}'
+            )
+        else:
+            resolved = row['report'].get('resolved', False)
    else:
        resolved = None

@@ -84,5 +104,18 @@ def write_row_to_md_file(row):
        f.write('## Model Patch\n')
        f.write(f'{process_git_patch(model_patch)}\n')

+        f.write('## Test Output\n')
+        f.write(str(test_output))

-oh_format.progress_apply(write_row_to_md_file, axis=1)
+
+instance_id_to_test_result = {}
+if eval_output_df is not None:
+    instance_id_to_test_result = (
+        eval_output_df[['instance_id', 'test_result']]
+        .set_index('instance_id')['test_result']
+        .to_dict()
+    )
+
+oh_format.progress_apply(
+    write_row_to_md_file, axis=1, instance_id_to_test_result=instance_id_to_test_result
+)
@@ -111,6 +111,11 @@ elif os.path.exists(openhands_remote_report_jsonl):
        instance_id_to_status[row['instance_id']] = row['test_result']['report']
    df['report'] = df.apply(apply_report, axis=1)

+    report_is_dict = df['report'].apply(lambda x: isinstance(x, dict))
+    if not report_is_dict.all():
+        print(df[~report_is_dict])
+        raise ValueError(f'Report is not a dict, but a {type(row["report"])}')
+
    _n_instances = len(df)
    _n_resolved = len(df[df['report'].apply(lambda x: x.get('resolved', False))])
    _n_unresolved = _n_instances - _n_resolved
@@ -24,7 +24,7 @@ class Test(BaseIntegrationTest):
    @classmethod
    def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
        # check if the file /workspace/bad.txt has been fixed
-        action = CmdRunAction(command='cat /workspace/bad.txt', keep_prompt=False)
+        action = CmdRunAction(command='cat /workspace/bad.txt')
        obs = runtime.run_action(action)
        if obs.exit_code != 0:
            return TestResult(
@@ -10,14 +10,14 @@ class Test(BaseIntegrationTest):

    @classmethod
    def initialize_runtime(cls, runtime: Runtime) -> None:
-        action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
+        action = CmdRunAction(command='mkdir -p /workspace')
        obs = runtime.run_action(action)
        assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

    @classmethod
    def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
        # check if the file /workspace/hello.sh exists
-        action = CmdRunAction(command='cat /workspace/hello.sh', keep_prompt=False)
+        action = CmdRunAction(command='cat /workspace/hello.sh')
        obs = runtime.run_action(action)
        if obs.exit_code != 0:
            return TestResult(
@@ -26,7 +26,7 @@ class Test(BaseIntegrationTest):
            )

        # execute the script
-        action = CmdRunAction(command='bash /workspace/hello.sh', keep_prompt=False)
+        action = CmdRunAction(command='bash /workspace/hello.sh')
        obs = runtime.run_action(action)
        if obs.exit_code != 0:
            return TestResult(
@@ -10,14 +10,14 @@ class Test(BaseIntegrationTest):

    @classmethod
    def initialize_runtime(cls, runtime: Runtime) -> None:
-        action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
+        action = CmdRunAction(command='mkdir -p /workspace')
        obs = runtime.run_action(action)
        assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

    @classmethod
    def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
        # check if the file /workspace/hello.sh exists
-        action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
+        action = CmdRunAction(command='cat /workspace/test.txt')
        obs = runtime.run_action(action)
        if obs.exit_code != 0:
            return TestResult(
@@ -26,7 +26,7 @@ class Test(BaseIntegrationTest):
            )

        # execute the script
-        action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
+        action = CmdRunAction(command='cat /workspace/test.txt')
        obs = runtime.run_action(action)

        if obs.exit_code != 0:
@@ -10,31 +10,29 @@ class Test(BaseIntegrationTest):

    @classmethod
    def initialize_runtime(cls, runtime: Runtime) -> None:
-        action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
+        action = CmdRunAction(command='mkdir -p /workspace')
        obs = runtime.run_action(action)
        assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

        # git init
-        action = CmdRunAction(command='git init', keep_prompt=False)
+        action = CmdRunAction(command='git init')
        obs = runtime.run_action(action)
        assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

        # create README.md
-        action = CmdRunAction(
-            command='echo \'print("hello world")\' > hello.py', keep_prompt=False
-        )
+        action = CmdRunAction(command='echo \'print("hello world")\' > hello.py')
        obs = runtime.run_action(action)
        assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

        # git add README.md
-        action = CmdRunAction(command='git add hello.py', keep_prompt=False)
+        action = CmdRunAction(command='git add hello.py')
        obs = runtime.run_action(action)
        assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

    @classmethod
    def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
        # check if the file /workspace/hello.py exists
-        action = CmdRunAction(command='cat /workspace/hello.py', keep_prompt=False)
+        action = CmdRunAction(command='cat /workspace/hello.py')
        obs = runtime.run_action(action)
        if obs.exit_code != 0:
            return TestResult(
@@ -43,7 +41,7 @@ class Test(BaseIntegrationTest):
            )

        # check if the staging area is empty
-        action = CmdRunAction(command='git status', keep_prompt=False)
+        action = CmdRunAction(command='git status')
        obs = runtime.run_action(action)
        if obs.exit_code != 0:
            return TestResult(
@@ -83,11 +83,11 @@ class Test(BaseIntegrationTest):

    @classmethod
    def initialize_runtime(cls, runtime: Runtime) -> None:
-        action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
+        action = CmdRunAction(command='mkdir -p /workspace')
        obs = runtime.run_action(action)
        assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

-        action = CmdRunAction(command='mkdir -p /tmp/server', keep_prompt=False)
+        action = CmdRunAction(command='mkdir -p /tmp/server')
        obs = runtime.run_action(action)
        assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

@@ -101,8 +101,7 @@ class Test(BaseIntegrationTest):

        # create README.md
        action = CmdRunAction(
-            command='cd /tmp/server && nohup python3 -m http.server 8000 &',
-            keep_prompt=False,
+            command='cd /tmp/server && nohup python3 -m http.server 8000 &'
        )
        obs = runtime.run_action(action)

@@ -218,4 +218,30 @@ describe("ChatInput", () => {
    // Verify image paste was handled
    expect(onImagePaste).toHaveBeenCalledWith([file]);
  });
+
+  it("should not submit when Enter is pressed during IME composition", async () => {
+    const user = userEvent.setup();
+    render(<ChatInput onSubmit={onSubmitMock} />);
+    const textarea = screen.getByRole("textbox");
+
+    await user.type(textarea, "こんにちは");
+
+    // Simulate Enter during IME composition
+    fireEvent.keyDown(textarea, {
+      key: "Enter",
+      isComposing: true,
+      nativeEvent: { isComposing: true },
+    });
+
+    expect(onSubmitMock).not.toHaveBeenCalled();
+
+    // Simulate normal Enter after composition is done
+    fireEvent.keyDown(textarea, {
+      key: "Enter",
+      isComposing: false,
+      nativeEvent: { isComposing: false },
+    });
+
+    expect(onSubmitMock).toHaveBeenCalledWith("こんにちは");
+  });
 });
@@ -19,9 +19,9 @@ describe("ConversationCard", () => {
        onDelete={onDelete}
        onClick={onClick}
        onChangeTitle={onChangeTitle}
-        name="Conversation 1"
-        repo={null}
-        lastUpdated="2021-10-01T12:00:00Z"
+        title="Conversation 1"
+        selectedRepository={null}
+        lastUpdatedAt="2021-10-01T12:00:00Z"
      />,
    );
    const expectedDate = `${formatTimeDelta(new Date("2021-10-01T12:00:00Z"))} ago`;
@@ -33,20 +33,20 @@ describe("ConversationCard", () => {
    within(card).getByText(expectedDate);
  });

-  it("should render the repo if available", () => {
+  it("should render the selectedRepository if available", () => {
    const { rerender } = render(
      <ConversationCard
        onDelete={onDelete}
        onClick={onClick}
        onChangeTitle={onChangeTitle}
-        name="Conversation 1"
-        repo={null}
-        lastUpdated="2021-10-01T12:00:00Z"
+        title="Conversation 1"
+        selectedRepository={null}
+        lastUpdatedAt="2021-10-01T12:00:00Z"
      />,
    );

    expect(
-      screen.queryByTestId("conversation-card-repo"),
+      screen.queryByTestId("conversation-card-selected-repository"),
    ).not.toBeInTheDocument();

    rerender(
@@ -54,13 +54,13 @@ describe("ConversationCard", () => {
        onDelete={onDelete}
        onClick={onClick}
        onChangeTitle={onChangeTitle}
-        name="Conversation 1"
-        repo="org/repo"
-        lastUpdated="2021-10-01T12:00:00Z"
+        title="Conversation 1"
+        selectedRepository="org/selectedRepository"
+        lastUpdatedAt="2021-10-01T12:00:00Z"
      />,
    );

-    screen.getByTestId("conversation-card-repo");
+    screen.getByTestId("conversation-card-selected-repository");
  });

  it("should call onClick when the card is clicked", async () => {
@@ -70,9 +70,9 @@ describe("ConversationCard", () => {
        onDelete={onDelete}
        onClick={onClick}
        onChangeTitle={onChangeTitle}
-        name="Conversation 1"
-        repo={null}
-        lastUpdated="2021-10-01T12:00:00Z"
+        title="Conversation 1"
+        selectedRepository={null}
+        lastUpdatedAt="2021-10-01T12:00:00Z"
      />,
    );

@@ -89,9 +89,9 @@ describe("ConversationCard", () => {
        onDelete={onDelete}
        onClick={onClick}
        onChangeTitle={onChangeTitle}
-        name="Conversation 1"
-        repo={null}
-        lastUpdated="2021-10-01T12:00:00Z"
+        title="Conversation 1"
+        selectedRepository={null}
+        lastUpdatedAt="2021-10-01T12:00:00Z"
      />,
    );

@@ -114,9 +114,9 @@ describe("ConversationCard", () => {
        onClick={onClick}
        onDelete={onDelete}
        onChangeTitle={onChangeTitle}
-        name="Conversation 1"
-        repo={null}
-        lastUpdated="2021-10-01T12:00:00Z"
+        title="Conversation 1"
+        selectedRepository={null}
+        lastUpdatedAt="2021-10-01T12:00:00Z"
      />,
    );

@@ -131,21 +131,21 @@ describe("ConversationCard", () => {
    expect(onDelete).toHaveBeenCalled();
  });

-  test("clicking the repo should not trigger the onClick handler", async () => {
+  test("clicking the selectedRepository should not trigger the onClick handler", async () => {
    const user = userEvent.setup();
    render(
      <ConversationCard
        onClick={onClick}
        onDelete={onDelete}
        onChangeTitle={onChangeTitle}
-        name="Conversation 1"
-        repo="org/repo"
-        lastUpdated="2021-10-01T12:00:00Z"
+        title="Conversation 1"
+        selectedRepository="org/selectedRepository"
+        lastUpdatedAt="2021-10-01T12:00:00Z"
      />,
    );

-    const repo = screen.getByTestId("conversation-card-repo");
-    await user.click(repo);
+    const selectedRepository = screen.getByTestId("conversation-card-selected-repository");
+    await user.click(selectedRepository);

    expect(onClick).not.toHaveBeenCalled();
  });
@@ -156,9 +156,9 @@ describe("ConversationCard", () => {
      <ConversationCard
        onClick={onClick}
        onDelete={onDelete}
-        name="Conversation 1"
-        repo={null}
-        lastUpdated="2021-10-01T12:00:00Z"
+        title="Conversation 1"
+        selectedRepository={null}
+        lastUpdatedAt="2021-10-01T12:00:00Z"
        onChangeTitle={onChangeTitle}
      />,
    );
@@ -180,9 +180,9 @@ describe("ConversationCard", () => {
        onClick={onClick}
        onDelete={onDelete}
        onChangeTitle={onChangeTitle}
-        name="Conversation 1"
-        repo={null}
-        lastUpdated="2021-10-01T12:00:00Z"
+        title="Conversation 1"
+        selectedRepository={null}
+        lastUpdatedAt="2021-10-01T12:00:00Z"
      />,
    );

@@ -202,9 +202,9 @@ describe("ConversationCard", () => {
        onClick={onClick}
        onDelete={onDelete}
        onChangeTitle={onChangeTitle}
-        name="Conversation 1"
-        repo={null}
-        lastUpdated="2021-10-01T12:00:00Z"
+        title="Conversation 1"
+        selectedRepository={null}
+        lastUpdatedAt="2021-10-01T12:00:00Z"
      />,
    );

@@ -221,9 +221,9 @@ describe("ConversationCard", () => {
        onClick={onClick}
        onDelete={onDelete}
        onChangeTitle={onChangeTitle}
-        name="Conversation 1"
-        repo={null}
-        lastUpdated="2021-10-01T12:00:00Z"
+        title="Conversation 1"
+        selectedRepository={null}
+        lastUpdatedAt="2021-10-01T12:00:00Z"
      />,
    );

@@ -239,19 +239,19 @@ describe("ConversationCard", () => {
  });

  describe("state indicator", () => {
-    it("should render the 'cold' indicator by default", () => {
+    it("should render the 'STOPPED' indicator by default", () => {
      render(
        <ConversationCard
          onClick={onClick}
          onDelete={onDelete}
          onChangeTitle={onChangeTitle}
-          name="Conversation 1"
-          repo={null}
-          lastUpdated="2021-10-01T12:00:00Z"
+          title="Conversation 1"
+          selectedRepository={null}
+          lastUpdatedAt="2021-10-01T12:00:00Z"
        />,
      );

-      screen.getByTestId("cold-indicator");
+      screen.getByTestId("STOPPED-indicator");
    });

    it("should render the other indicators when provided", () => {
@@ -260,15 +260,15 @@ describe("ConversationCard", () => {
          onClick={onClick}
          onDelete={onDelete}
          onChangeTitle={onChangeTitle}
-          name="Conversation 1"
-          repo={null}
-          lastUpdated="2021-10-01T12:00:00Z"
-          state="warm"
+          title="Conversation 1"
+          selectedRepository={null}
+          lastUpdatedAt="2021-10-01T12:00:00Z"
+          status="RUNNING"
        />,
      );

-      expect(screen.queryByTestId("cold-indicator")).not.toBeInTheDocument();
-      screen.getByTestId("warm-indicator");
+      expect(screen.queryByTestId("STOPPED-indicator")).not.toBeInTheDocument();
+      screen.getByTestId("RUNNING-indicator");
    });
  });
 });
@@ -175,7 +175,7 @@ describe("ConversationPanel", () => {

    // Ensure the conversation is renamed
    expect(updateUserConversationSpy).toHaveBeenCalledWith("3", {
-      name: "Conversation 1 Renamed",
+      title: "Conversation 1 Renamed",
    });
  });

@@ -14,7 +14,8 @@ describe("GitHubRepositorySelector", () => {
      <GitHubRepositorySelector
        onInputChange={onInputChangeMock}
        onSelect={onSelectMock}
-        repositories={[]}
+        publicRepositories={[]}
+        userRepositories={[]}
      />,
    );

@@ -36,7 +37,8 @@ describe("GitHubRepositorySelector", () => {
      <GitHubRepositorySelector
        onInputChange={onInputChangeMock}
        onSelect={onSelectMock}
-        repositories={[]}
+        publicRepositories={[]}
+        userRepositories={[]}
      />,
    );

@@ -67,7 +69,8 @@ describe("GitHubRepositorySelector", () => {
      <GitHubRepositorySelector
        onInputChange={onInputChangeMock}
        onSelect={onSelectMock}
-        repositories={[]}
+        publicRepositories={[]}
+        userRepositories={[]}
      />,
    );

@@ -4,7 +4,7 @@ import { describe, expect, it } from "vitest";
 import { renderWithProviders } from "test-utils";
 import { createRoutesStub } from "react-router";
 import { Sidebar } from "#/components/features/sidebar/sidebar";
-import { MULTI_CONVO_UI_IS_ENABLED } from "#/utils/constants";
+import { MULTI_CONVERSATION_UI } from "#/utils/feature-flags";

 const renderSidebar = () => {
  const RouterStub = createRoutesStub([
@@ -18,7 +18,7 @@ const renderSidebar = () => {
 };

 describe("Sidebar", () => {
-  it.skipIf(!MULTI_CONVO_UI_IS_ENABLED)(
+  it.skipIf(!MULTI_CONVERSATION_UI)(
    "should have the conversation panel open by default",
    () => {
      renderSidebar();
@@ -26,7 +26,7 @@ describe("Sidebar", () => {
    },
  );

-  it.skipIf(!MULTI_CONVO_UI_IS_ENABLED)(
+  it.skipIf(!MULTI_CONVERSATION_UI)(
    "should toggle the conversation panel",
    async () => {
      const user = userEvent.setup();
@@ -0,0 +1,35 @@
+import { screen } from "@testing-library/react";
+import { describe, it, expect } from "vitest";
+import { renderWithProviders } from "test-utils";
+import { RuntimeSizeSelector } from "#/components/shared/modals/settings/runtime-size-selector";
+
+const renderRuntimeSizeSelector = () =>
+  renderWithProviders(<RuntimeSizeSelector isDisabled={false} />);
+
+describe("RuntimeSizeSelector", () => {
+  it("should show both runtime size options", () => {
+    renderRuntimeSizeSelector();
+    // The options are in the hidden select element
+    const select = screen.getByRole("combobox", { hidden: true });
+    expect(select).toHaveValue("1");
+    expect(select).toHaveDisplayValue("1x (2 core, 8G)");
+    expect(select.children).toHaveLength(3); // Empty option + 2 size options
+  });
+
+  it("should show the full description text for disabled options", async () => {
+    renderRuntimeSizeSelector();
+
+    // Click the button to open the dropdown
+    const button = screen.getByRole("button", {
+      name: "1x (2 core, 8G) SETTINGS_FORM$RUNTIME_SIZE_LABEL",
+    });
+    button.click();
+
+    // Wait for the dropdown to open and find the description text
+    const description = await screen.findByText(
+      "Runtime sizes over 1 are disabled by default, please contact contact@all-hands.dev to get access to larger runtimes.",
+    );
+    expect(description).toBeInTheDocument();
+    expect(description).toHaveClass("whitespace-normal", "break-words");
+  });
+});
@@ -0,0 +1,45 @@
+import { screen, fireEvent } from "@testing-library/react";
+import { describe, it, expect, vi } from "vitest";
+import { renderWithProviders } from "test-utils";
+import { createRoutesStub } from "react-router";
+import { DEFAULT_SETTINGS } from "#/services/settings";
+import { SettingsForm } from "#/components/shared/modals/settings/settings-form";
+import OpenHands from "#/api/open-hands";
+
+describe("SettingsForm", () => {
+  const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
+  getConfigSpy.mockResolvedValue({
+    APP_MODE: "saas",
+    GITHUB_CLIENT_ID: "123",
+    POSTHOG_CLIENT_KEY: "123",
+  });
+
+  const RouterStub = createRoutesStub([
+    {
+      Component: () => (
+        <SettingsForm
+          settings={DEFAULT_SETTINGS}
+          models={[]}
+          agents={[]}
+          securityAnalyzers={[]}
+          onClose={() => {}}
+        />
+      ),
+      path: "/",
+    },
+  ]);
+
+  it("should not show runtime size selector by default", () => {
+    renderWithProviders(<RouterStub />);
+    expect(screen.queryByText("Runtime Size")).not.toBeInTheDocument();
+  });
+
+  it("should show runtime size selector when advanced options are enabled", async () => {
+    renderWithProviders(<RouterStub />);
+    const advancedSwitch = screen.getByRole("switch", {
+      name: "SETTINGS_FORM$ADVANCED_OPTIONS_LABEL",
+    });
+    fireEvent.click(advancedSwitch);
+    await screen.findByText("SETTINGS_FORM$RUNTIME_SIZE_LABEL");
+  });
+});
@@ -5,7 +5,7 @@ import { screen, waitFor } from "@testing-library/react";
 import toast from "react-hot-toast";
 import App from "#/routes/_oh.app/route";
 import OpenHands from "#/api/open-hands";
-import { MULTI_CONVO_UI_IS_ENABLED } from "#/utils/constants";
+import { MULTI_CONVERSATION_UI } from "#/utils/feature-flags";

 describe("App", () => {
  const RouteStub = createRoutesStub([
@@ -35,7 +35,7 @@ describe("App", () => {
    await screen.findByTestId("app-route");
  });

-  it.skipIf(!MULTI_CONVO_UI_IS_ENABLED)(
+  it.skipIf(!MULTI_CONVERSATION_UI)(
    "should call endSession if the user does not have permission to view conversation",
    async () => {
      const errorToastSpy = vi.spyOn(toast, "error");
@@ -59,10 +59,10 @@ describe("App", () => {

    getConversationSpy.mockResolvedValue({
      conversation_id: "9999",
-      lastUpdated: "",
-      name: "",
-      repo: "",
-      state: "cold",
+      last_updated_at: "",
+      title: "",
+      selected_repository: "",
+      status: "STOPPED",
    });
    const { rerender } = renderWithProviders(
      <RouteStub initialEntries={["/conversation/9999"]} />,
@@ -52,7 +52,7 @@
        "@playwright/test": "^1.49.1",
        "@react-router/dev": "^7.1.1",
        "@tailwindcss/typography": "^0.5.15",
-        "@tanstack/eslint-plugin-query": "^5.62.9",
+        "@tanstack/eslint-plugin-query": "^5.62.15",
        "@testing-library/jest-dom": "^6.6.1",
        "@testing-library/react": "^16.1.0",
        "@testing-library/user-event": "^14.5.2",
@@ -5344,11 +5344,10 @@
      }
    },
    "node_modules/@tanstack/eslint-plugin-query": {
-      "version": "5.62.9",
-      "resolved": "https://registry.npmjs.org/@tanstack/eslint-plugin-query/-/eslint-plugin-query-5.62.9.tgz",
-      "integrity": "sha512-F3onhTcpBj7zQDo0NVtZwZQKRFx8BwpSabMJybl9no3+dFHUurvNMrH5M/6KNpkdDCf3zyHWadruZL6636B8Fw==",
+      "version": "5.62.15",
+      "resolved": "https://registry.npmjs.org/@tanstack/eslint-plugin-query/-/eslint-plugin-query-5.62.15.tgz",
+      "integrity": "sha512-24BHoF3LIzyptjrZXc1IpaISno+fhVD3zWWso/HPSB+ZVOyOXoiQSQc2K362T13JKJ07EInhHi1+KyNoRzCCfQ==",
      "dev": true,
-      "license": "MIT",
      "dependencies": {
        "@typescript-eslint/utils": "^8.18.1"
      },
@@ -79,7 +79,7 @@
    "@playwright/test": "^1.49.1",
    "@react-router/dev": "^7.1.1",
    "@tailwindcss/typography": "^0.5.15",
-    "@tanstack/eslint-plugin-query": "^5.62.9",
+    "@tanstack/eslint-plugin-query": "^5.62.15",
    "@testing-library/jest-dom": "^6.6.1",
    "@testing-library/react": "^16.1.0",
    "@testing-library/user-event": "^14.5.2",
@@ -110,16 +110,11 @@ export const searchPublicRepositories = async (
  sort: "" | "updated" | "stars" | "forks" = "stars",
  order: "desc" | "asc" = "desc",
 ): Promise<GitHubRepository[]> => {
-  const sanitizedQuery = query.trim();
-  if (!sanitizedQuery) {
-    return [];
-  }
-
  const response = await github.get<{ items: GitHubRepository[] }>(
    "/search/repositories",
    {
      params: {
-        q: sanitizedQuery,
+        q: query,
        per_page,
        sort,
        order,
@@ -9,6 +9,7 @@ import {
  GetVSCodeUrlResponse,
  AuthenticateResponse,
  Conversation,
+  ResultSet,
 } from "./open-hands.types";
 import { openHands } from "./open-hands-axios";
 import { ApiSettings } from "#/services/settings";
@@ -222,8 +223,10 @@ class OpenHands {
  }

  static async getUserConversations(): Promise<Conversation[]> {
-    const { data } = await openHands.get<Conversation[]>("/api/conversations");
-    return data;
+    const { data } = await openHands.get<ResultSet<Conversation>>(
+      "/api/conversations?limit=9",
+    );
+    return data.results;
  }

  static async deleteUserConversation(conversationId: string): Promise<void> {
@@ -232,9 +235,9 @@ class OpenHands {

  static async updateUserConversation(
    conversationId: string,
-    conversation: Partial<Omit<Conversation, "id">>,
+    conversation: Partial<Omit<Conversation, "conversation_id">>,
  ): Promise<void> {
-    await openHands.put(`/api/conversations/${conversationId}`, conversation);
+    await openHands.patch(`/api/conversations/${conversationId}`, conversation);
  }

  static async createConversation(
@@ -1,4 +1,4 @@
-import { ProjectState } from "#/components/features/conversation-panel/conversation-state-indicator";
+import { ProjectStatus } from "#/components/features/conversation-panel/conversation-state-indicator";

 export interface ErrorResponse {
  error: string;
@@ -62,8 +62,13 @@ export interface AuthenticateResponse {

 export interface Conversation {
  conversation_id: string;
-  name: string;
-  repo: string | null;
-  lastUpdated: string;
-  state: ProjectState;
+  title: string;
+  selected_repository: string | null;
+  last_updated_at: string;
+  status: ProjectStatus;
+}
+
+export interface ResultSet<T> {
+  results: T[];
+  next_page_id: string | null;
 }
@@ -61,4 +61,8 @@ export const AGENT_STATUS_MAP: {
    message: I18nKey.CHAT_INTERFACE$AGENT_ACTION_USER_REJECTED_MESSAGE,
    indicator: IndicatorColor.RED,
  },
+  [AgentState.RATE_LIMITED]: {
+    message: I18nKey.CHAT_INTERFACE$AGENT_RATE_LIMITED_MESSAGE,
+    indicator: IndicatorColor.YELLOW,
+  },
 };
@@ -94,7 +94,12 @@ export function ChatInput({
  };

  const handleKeyPress = (event: React.KeyboardEvent<HTMLTextAreaElement>) => {
-    if (event.key === "Enter" && !event.shiftKey && !disabled) {
+    if (
+      event.key === "Enter" &&
+      !event.shiftKey &&
+      !disabled &&
+      !event.nativeEvent.isComposing
+    ) {
      event.preventDefault();
      handleSubmitMessage();
    }
@@ -154,7 +154,8 @@ export function ChatInterface() {
          onStop={handleStop}
          isDisabled={
            curAgentState === AgentState.LOADING ||
-            curAgentState === AgentState.AWAITING_USER_CONFIRMATION
+            curAgentState === AgentState.AWAITING_USER_CONFIRMATION ||
+            curAgentState === AgentState.RATE_LIMITED
          }
          mode={curAgentState === AgentState.RUNNING ? "stop" : "submit"}
          value={messageToSend ?? undefined}
@@ -18,7 +18,7 @@ export function ContextMenu({
    <ul
      data-testid={testId}
      ref={ref}
-      className={cn("bg-[#404040] rounded-md w-[224px]", className)}
+      className={cn("bg-[#404040] rounded-md w-[140px]", className)}
    >
      {children}
    </ul>
@@ -2,7 +2,7 @@ import React from "react";
 import { formatTimeDelta } from "#/utils/format-time-delta";
 import { ConversationRepoLink } from "./conversation-repo-link";
 import {
-  ProjectState,
+  ProjectStatus,
  ConversationStateIndicator,
 } from "./conversation-state-indicator";
 import { ContextMenu } from "../context-menu/context-menu";
@@ -13,20 +13,20 @@ interface ProjectCardProps {
  onClick: () => void;
  onDelete: () => void;
  onChangeTitle: (title: string) => void;
-  name: string;
-  repo: string | null;
-  lastUpdated: string; // ISO 8601
-  state?: ProjectState;
+  title: string;
+  selectedRepository: string | null;
+  lastUpdatedAt: string; // ISO 8601
+  status?: ProjectStatus;
 }

 export function ConversationCard({
  onClick,
  onDelete,
  onChangeTitle,
-  name,
-  repo,
-  lastUpdated,
-  state = "cold",
+  title,
+  selectedRepository,
+  lastUpdatedAt,
+  status = "STOPPED",
 }: ProjectCardProps) {
  const [contextMenuVisible, setContextMenuVisible] = React.useState(false);
  const inputRef = React.useRef<HTMLInputElement>(null);
@@ -38,7 +38,13 @@ export function ConversationCard({
      inputRef.current!.value = trimmed;
    } else {
      // reset the value if it's empty
-      inputRef.current!.value = name;
+      inputRef.current!.value = title;
+    }
+  };
+
+  const handleKeyUp = (event: React.KeyboardEvent<HTMLInputElement>) => {
+    if (event.key === "Enter") {
+      event.currentTarget.blur();
    }
  };

@@ -55,47 +61,45 @@ export function ConversationCard({
    <div
      data-testid="conversation-card"
      onClick={onClick}
-      className="h-[100px] w-full px-[18px] py-4 border-b border-neutral-600"
+      className="h-[100px] w-full px-[18px] py-4 border-b border-neutral-600 cursor-pointer"
    >
-      <div className="flex items-center justify-between">
+      <div className="flex items-center justify-between space-x-1">
        <input
          ref={inputRef}
          data-testid="conversation-card-title"
          onClick={handleInputClick}
          onBlur={handleBlur}
+          onKeyUp={handleKeyUp}
          type="text"
-          defaultValue={name}
-          className="text-sm leading-6 font-semibold bg-transparent"
+          defaultValue={title}
+          className="text-sm leading-6 font-semibold bg-transparent w-full"
        />

        <div className="flex items-center gap-2 relative">
-          <ConversationStateIndicator state={state} />
+          <ConversationStateIndicator status={status} />
          <EllipsisButton
            onClick={(event) => {
              event.stopPropagation();
              setContextMenuVisible((prev) => !prev);
            }}
          />
-          {contextMenuVisible && (
-            <ContextMenu testId="context-menu" className="absolute left-full">
-              <ContextMenuListItem
-                testId="delete-button"
-                onClick={handleDelete}
-              >
-                Delete
-              </ContextMenuListItem>
-            </ContextMenu>
-          )}
        </div>
      </div>
-      {repo && (
+      {contextMenuVisible && (
+        <ContextMenu testId="context-menu" className="left-full float-right">
+          <ContextMenuListItem testId="delete-button" onClick={handleDelete}>
+            Delete
+          </ContextMenuListItem>
+        </ContextMenu>
+      )}
+      {selectedRepository && (
        <ConversationRepoLink
-          repo={repo}
+          selectedRepository={selectedRepository}
          onClick={(e) => e.stopPropagation()}
        />
      )}
      <p className="text-xs text-neutral-400">
-        <time>{formatTimeDelta(new Date(lastUpdated))} ago</time>
+        <time>{formatTimeDelta(new Date(lastUpdatedAt))} ago</time>
      </p>
    </div>
  );
@@ -60,7 +60,7 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
    if (oldTitle !== newTitle)
      updateConversation({
        id: conversationId,
-        conversation: { name: newTitle },
+        conversation: { title: newTitle },
      });
  };

@@ -72,7 +72,7 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
  return (
    <div
      data-testid="conversation-panel"
-      className="w-[350px] h-full border border-neutral-700 bg-neutral-800 rounded-xl"
+      className="w-[350px] h-full border border-neutral-700 bg-neutral-800 rounded-xl overflow-y-auto"
    >
      <div className="pt-4 px-4 flex items-center justify-between">
        {location.pathname.startsWith("/conversation") && (
@@ -98,12 +98,12 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
          onClick={() => handleClickCard(project.conversation_id)}
          onDelete={() => handleDeleteProject(project.conversation_id)}
          onChangeTitle={(title) =>
-            handleChangeTitle(project.conversation_id, project.name, title)
+            handleChangeTitle(project.conversation_id, project.title, title)
          }
-          name={project.name}
-          repo={project.repo}
-          lastUpdated={project.lastUpdated}
-          state={project.state}
+          title={project.title}
+          selectedRepository={project.selected_repository}
+          lastUpdatedAt={project.last_updated_at}
+          status={project.status}
        />
      ))}

@@ -1,21 +1,21 @@
 interface ConversationRepoLinkProps {
-  repo: string;
+  selectedRepository: string;
  onClick?: (event: React.MouseEvent<HTMLAnchorElement>) => void;
 }

 export function ConversationRepoLink({
-  repo,
+  selectedRepository,
  onClick,
 }: ConversationRepoLinkProps) {
  return (
    <a
-      data-testid="conversation-card-repo"
-      href={`https://github.com/${repo}`}
+      data-testid="conversation-card-selected-repository"
+      href={`https://github.com/${selectedRepository}`}
      target="_blank noopener noreferrer"
      onClick={onClick}
      className="text-xs text-neutral-400 hover:text-neutral-200"
    >
-      {repo}
+      {selectedRepository}
    </a>
  );
 }
@@ -1,39 +1,25 @@
 import ColdIcon from "./state-indicators/cold.svg?react";
-import CoolingIcon from "./state-indicators/cooling.svg?react";
-import FinishedIcon from "./state-indicators/finished.svg?react";
 import RunningIcon from "./state-indicators/running.svg?react";
-import WaitingIcon from "./state-indicators/waiting.svg?react";
-import WarmIcon from "./state-indicators/warm.svg?react";

 type SVGIcon = React.FunctionComponent<React.SVGProps<SVGSVGElement>>;
-export type ProjectState =
-  | "cold"
-  | "cooling"
-  | "finished"
-  | "running"
-  | "waiting"
-  | "warm";
+export type ProjectStatus = "RUNNING" | "STOPPED";

-const INDICATORS: Record<ProjectState, SVGIcon> = {
-  cold: ColdIcon,
-  cooling: CoolingIcon,
-  finished: FinishedIcon,
-  running: RunningIcon,
-  waiting: WaitingIcon,
-  warm: WarmIcon,
+const INDICATORS: Record<ProjectStatus, SVGIcon> = {
+  STOPPED: ColdIcon,
+  RUNNING: RunningIcon,
 };

 interface ConversationStateIndicatorProps {
-  state: ProjectState;
+  status: ProjectStatus;
 }

 export function ConversationStateIndicator({
-  state,
+  status,
 }: ConversationStateIndicatorProps) {
-  const StateIcon = INDICATORS[state];
+  const StateIcon = INDICATORS[status];

  return (
-    <div data-testid={`${state}-indicator`}>
+    <div data-testid={`${status}-indicator`}>
      <StateIcon />
    </div>
  );
@@ -1,46 +1,48 @@
 import React from "react";
-import { Autocomplete, AutocompleteItem } from "@nextui-org/react";
+import {
+  Autocomplete,
+  AutocompleteItem,
+  AutocompleteSection,
+} from "@nextui-org/react";
 import { useDispatch } from "react-redux";
 import posthog from "posthog-js";
 import { setSelectedRepository } from "#/state/initial-query-slice";
 import { useConfig } from "#/hooks/query/use-config";
-
-interface GitHubRepositoryWithPublic extends GitHubRepository {
-  is_public?: boolean;
-}
+import { sanitizeQuery } from "#/utils/sanitize-query";

 interface GitHubRepositorySelectorProps {
  onInputChange: (value: string) => void;
  onSelect: () => void;
-  repositories: GitHubRepositoryWithPublic[];
+  userRepositories: GitHubRepository[];
+  publicRepositories: GitHubRepository[];
 }

 export function GitHubRepositorySelector({
  onInputChange,
  onSelect,
-  repositories,
+  userRepositories,
+  publicRepositories,
 }: GitHubRepositorySelectorProps) {
  const { data: config } = useConfig();
  const [selectedKey, setSelectedKey] = React.useState<string | null>(null);

+  const allRepositories: GitHubRepository[] = [
+    ...publicRepositories.filter(
+      (repo) => !publicRepositories.find((r) => r.id === repo.id),
+    ),
+    ...userRepositories,
+  ];
+
  const dispatch = useDispatch();

  const handleRepoSelection = (id: string | null) => {
-    const repo = repositories.find((r) => r.id.toString() === id);
-    if (!repo) return;
-
-    if (repo.id === -1000) {
-      window.open(
-        `https://github.com/apps/${config?.APP_SLUG}/installations/new`,
-        "_blank",
-      );
-      return;
+    const repo = allRepositories.find((r) => r.id.toString() === id);
+    if (repo) {
+      dispatch(setSelectedRepository(repo.full_name));
+      posthog.capture("repository_selected");
+      onSelect();
+      setSelectedKey(id);
    }
-
-    dispatch(setSelectedRepository(repo.full_name));
-    posthog.capture("repository_selected");
-    onSelect();
-    setSelectedKey(id);
  };

  const handleClearSelection = () => {
@@ -55,8 +57,8 @@ export function GitHubRepositorySelector({
      name="repo"
      aria-label="GitHub Repository"
      placeholder="Select a GitHub project"
+      isVirtualized={false}
      selectedKey={selectedKey}
-      items={repositories}
      inputProps={{
        classNames: {
          inputWrapper:
@@ -65,27 +67,61 @@ export function GitHubRepositorySelector({
      }}
      onSelectionChange={(id) => handleRepoSelection(id?.toString() ?? null)}
      onInputChange={onInputChange}
-      clearButtonProps={{ onPress: handleClearSelection }}
+      clearButtonProps={{ onClick: handleClearSelection }}
      listboxProps={{
        emptyContent,
      }}
+      defaultFilter={(textValue, inputValue) =>
+        !inputValue ||
+        sanitizeQuery(textValue).includes(sanitizeQuery(inputValue))
+      }
    >
-      {(item) => (
-        <AutocompleteItem
-          data-testid="github-repo-item"
-          key={item.id}
-          value={item.id}
-          textValue={item.full_name}
-        >
-          <div className="flex items-center justify-between">
-            {item.full_name}
-            {item.is_public && !!item.stargazers_count && (
-              <span className="text-xs text-gray-400">
-                ({item.stargazers_count}⭐)
+      {config?.APP_MODE === "saas" &&
+        config?.APP_SLUG &&
+        ((
+          <AutocompleteItem key="install">
+            <a
+              href={`https://github.com/apps/${config.APP_SLUG}/installations/new`}
+              target="_blank"
+              rel="noreferrer noopener"
+              onClick={(e) => e.stopPropagation()}
+            >
+              Add more repositories...
+            </a>
+          </AutocompleteItem> // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        ) as any)}
+      {userRepositories.length > 0 && (
+        <AutocompleteSection showDivider title="Your Repos">
+          {userRepositories.map((repo) => (
+            <AutocompleteItem
+              data-testid="github-repo-item"
+              key={repo.id}
+              value={repo.id}
+              className="data-[selected=true]:bg-default-100"
+              textValue={repo.full_name}
+            >
+              {repo.full_name}
+            </AutocompleteItem>
+          ))}
+        </AutocompleteSection>
+      )}
+      {publicRepositories.length > 0 && (
+        <AutocompleteSection showDivider title="Public Repos">
+          {publicRepositories.map((repo) => (
+            <AutocompleteItem
+              data-testid="github-repo-item"
+              key={repo.id}
+              value={repo.id}
+              className="data-[selected=true]:bg-default-100"
+              textValue={repo.full_name}
+            >
+              {repo.full_name}
+              <span className="ml-1 text-gray-400">
+                ({repo.stargazers_count || 0}⭐)
              </span>
-            )}
-          </div>
-        </AutocompleteItem>
+            </AutocompleteItem>
+          ))}
+        </AutocompleteSection>
      )}
    </Autocomplete>
  );
@@ -11,7 +11,6 @@ import { useSearchRepositories } from "#/hooks/query/use-search-repositories";
 import { useUserRepositories } from "#/hooks/query/use-user-repositories";
 import { sanitizeQuery } from "#/utils/sanitize-query";
 import { useDebounce } from "#/hooks/use-debounce";
-import { useConfig } from "#/hooks/query/use-config";

 interface GitHubRepositoriesSuggestionBoxProps {
  handleSubmit: () => void;
@@ -29,7 +28,6 @@ export function GitHubRepositoriesSuggestionBox({
  const [searchQuery, setSearchQuery] = React.useState<string>("");
  const debouncedSearchQuery = useDebounce(searchQuery, 300);

-  const { data: config } = useConfig();
  // TODO: Use `useQueries` to fetch all repositories in parallel
  const { data: appRepositories } = useAppRepositories();
  const { data: userRepositories } = useUserRepositories();
@@ -37,19 +35,6 @@ export function GitHubRepositoriesSuggestionBox({
    sanitizeQuery(debouncedSearchQuery),
  );

-  const saasPlaceholderRepository = React.useMemo(() => {
-    if (config?.APP_MODE === "saas" && config?.APP_SLUG) {
-      return [
-        {
-          id: -1000,
-          full_name: "Add more repositories...",
-        },
-      ];
-    }
-
-    return [];
-  }, [config]);
-
  const repositories =
    userRepositories?.pages.flatMap((page) => page.data) ||
    appRepositories?.pages.flatMap((page) => page.data) ||
@@ -74,11 +59,8 @@ export function GitHubRepositoriesSuggestionBox({
            <GitHubRepositorySelector
              onInputChange={setSearchQuery}
              onSelect={handleSubmit}
-              repositories={[
-                ...saasPlaceholderRepository,
-                ...searchedRepos,
-                ...repositories,
-              ]}
+              publicRepositories={searchedRepos}
+              userRepositories={repositories}
            />
          ) : (
            <ModalButton
@@ -1,6 +1,6 @@
 import React from "react";
 import { useLocation } from "react-router";
-import FolderIcon from "#/icons/docs.svg?react";
+import { FaListUl } from "react-icons/fa";
 import { useAuth } from "#/context/auth-context";
 import { useGitHubUser } from "#/hooks/query/use-github-user";
 import { useIsAuthed } from "#/hooks/query/use-is-authed";
@@ -16,8 +16,7 @@ import { SettingsModal } from "#/components/shared/modals/settings/settings-moda
 import { useSettingsUpToDate } from "#/context/settings-up-to-date-context";
 import { useSettings } from "#/hooks/query/use-settings";
 import { ConversationPanel } from "../conversation-panel/conversation-panel";
-import { cn } from "#/utils/utils";
-import { MULTI_CONVO_UI_IS_ENABLED } from "#/utils/constants";
+import { MULTI_CONVERSATION_UI } from "#/utils/feature-flags";

 export function Sidebar() {
  const location = useLocation();
@@ -32,9 +31,18 @@ export function Sidebar() {
  const [settingsModalIsOpen, setSettingsModalIsOpen] = React.useState(false);
  const [startNewProjectModalIsOpen, setStartNewProjectModalIsOpen] =
    React.useState(false);
-  const [conversationPanelIsOpen, setConversationPanelIsOpen] = React.useState(
-    MULTI_CONVO_UI_IS_ENABLED,
-  );
+  const [conversationPanelIsOpen, setConversationPanelIsOpen] =
+    React.useState(false);
+  const conversationPanelRef = React.useRef<HTMLDivElement | null>(null);
+
+  const handleClick = (event: MouseEvent) => {
+    const conversationPanel = conversationPanelRef.current;
+    if (conversationPanelIsOpen && conversationPanel) {
+      if (!conversationPanel.contains(event.target as Node)) {
+        setConversationPanelIsOpen(false);
+      }
+    }
+  };

  React.useEffect(() => {
    // If the github token is invalid, open the account settings modal again
@@ -43,6 +51,13 @@ export function Sidebar() {
    }
  }, [user.isError]);

+  React.useEffect(() => {
+    document.addEventListener("click", handleClick);
+    return () => {
+      document.removeEventListener("click", handleClick);
+    };
+  }, [conversationPanelIsOpen]);
+
  const handleAccountSettingsModalClose = () => {
    // If the user closes the modal without connecting to GitHub,
    // we need to log them out to clear the invalid token from the
@@ -77,16 +92,17 @@ export function Sidebar() {
            />
          )}
          <SettingsButton onClick={() => setSettingsModalIsOpen(true)} />
-          {MULTI_CONVO_UI_IS_ENABLED && (
+          {MULTI_CONVERSATION_UI && (
            <button
              data-testid="toggle-conversation-panel"
              type="button"
              onClick={() => setConversationPanelIsOpen((prev) => !prev)}
-              className={cn(
-                conversationPanelIsOpen ? "border-b-2 border-[#FFE165]" : "",
-              )}
            >
-              <FolderIcon width={28} height={28} />
+              <FaListUl
+                width={28}
+                height={28}
+                fill={conversationPanelIsOpen ? "#FFE165" : "#FFFFFF"}
+              />
            </button>
          )}
          <DocsButton />
@@ -97,6 +113,7 @@ export function Sidebar() {

        {conversationPanelIsOpen && (
          <div
+            ref={conversationPanelRef}
            className="absolute h-full left-[calc(100%+12px)] top-0 z-20" // 12px padding (sidebar parent)
          >
            <ConversationPanel
@@ -1,4 +1,11 @@
-import React, { CSSProperties, JSX, useEffect, useRef, useState } from "react";
+import React, {
+  CSSProperties,
+  JSX,
+  useCallback,
+  useEffect,
+  useRef,
+  useState,
+} from "react";
 import {
  VscChevronDown,
  VscChevronLeft,
@@ -38,55 +45,70 @@ export function ResizablePanel({
  orientation,
  initialSize,
 }: ResizablePanelProps): JSX.Element {
-  const [firstSize, setFirstSize] = useState<number>(initialSize);
+  const isHorizontal = orientation === Orientation.HORIZONTAL;
+
+  const getConstraints = useCallback(
+    () => ({
+      min: isHorizontal ? 350 : 300,
+      max: isHorizontal ? window.innerWidth * 0.5 : window.innerHeight * 0.7,
+    }),
+    [isHorizontal],
+  );
+
+  const constrainSize = useCallback(
+    (size: number) => {
+      const { min, max } = getConstraints();
+      return Math.min(Math.max(size, min), max);
+    },
+    [getConstraints],
+  );
+
+  const [firstSize, setFirstSize] = useState(() => constrainSize(initialSize));
  const [dividerPosition, setDividerPosition] = useState<number | null>(null);
  const firstRef = useRef<HTMLDivElement>(null);
  const secondRef = useRef<HTMLDivElement>(null);
  const [collapse, setCollapse] = useState<Collapse>(Collapse.SPLIT);
-  const isHorizontal = orientation === Orientation.HORIZONTAL;

  useEffect(() => {
-    if (dividerPosition == null || !firstRef.current) {
-      return undefined;
-    }
-    const getFirstSizeFromEvent = (e: MouseEvent) => {
-      const position = isHorizontal ? e.clientX : e.clientY;
-      return firstSize + position - dividerPosition;
+    const handleResize = () => setFirstSize(constrainSize(firstSize));
+    const timeoutId = setTimeout(handleResize, 100);
+    window.addEventListener("resize", handleResize);
+    return () => {
+      clearTimeout(timeoutId);
+      window.removeEventListener("resize", handleResize);
    };
+  }, [firstSize, constrainSize]);
+
+  useEffect(() => {
+    if (!dividerPosition) return undefined;
+
    const onMouseMove = (e: MouseEvent) => {
      e.preventDefault();
-      const newFirstSize = `${getFirstSizeFromEvent(e)}px`;
-      const { current } = firstRef;
-      if (current) {
-        if (isHorizontal) {
-          current.style.width = newFirstSize;
-          current.style.minWidth = newFirstSize;
-        } else {
-          current.style.height = newFirstSize;
-          current.style.minHeight = newFirstSize;
-        }
-      }
+      const delta = (isHorizontal ? e.clientX : e.clientY) - dividerPosition;
+      setFirstSize(constrainSize(firstSize + delta));
+      setDividerPosition(isHorizontal ? e.clientX : e.clientY);
    };
+
    const onMouseUp = (e: MouseEvent) => {
      e.preventDefault();
-      if (firstRef.current) {
-        firstRef.current.style.transition = "";
-      }
-      if (secondRef.current) {
-        secondRef.current.style.transition = "";
-      }
-      setFirstSize(getFirstSizeFromEvent(e));
+      if (firstRef.current) firstRef.current.style.transition = "";
+      if (secondRef.current) secondRef.current.style.transition = "";
+      setFirstSize(
+        constrainSize(
+          firstSize +
+            ((isHorizontal ? e.clientX : e.clientY) - dividerPosition),
+        ),
+      );
      setDividerPosition(null);
-      document.removeEventListener("mousemove", onMouseMove);
-      document.removeEventListener("mouseup", onMouseUp);
    };
+
    document.addEventListener("mousemove", onMouseMove);
    document.addEventListener("mouseup", onMouseUp);
    return () => {
      document.removeEventListener("mousemove", onMouseMove);
      document.removeEventListener("mouseup", onMouseUp);
    };
-  }, [dividerPosition, firstSize, orientation]);
+  }, [dividerPosition, firstSize, isHorizontal, constrainSize]);

  const onMouseDown = (e: React.MouseEvent) => {
    e.preventDefault();
@@ -100,67 +122,82 @@ export function ResizablePanel({
    setDividerPosition(position);
  };

-  const getStyleForFirst = () => {
-    const style: CSSProperties = { overflow: "hidden" };
-    if (collapse === Collapse.COLLAPSED) {
-      style.opacity = 0;
-      style.width = 0;
-      style.minWidth = 0;
-      style.height = 0;
-      style.minHeight = 0;
-    } else if (collapse === Collapse.SPLIT) {
-      const firstSizePx = `${firstSize}px`;
-      if (isHorizontal) {
-        style.width = firstSizePx;
-        style.minWidth = firstSizePx;
-      } else {
-        style.height = firstSizePx;
-        style.minHeight = firstSizePx;
+  const getPanelStyle = useCallback(
+    (isFirst: boolean): CSSProperties => {
+      const style: CSSProperties = { overflow: "hidden" };
+      const { min } = getConstraints();
+      const isHidden =
+        (isFirst && collapse === Collapse.COLLAPSED) ||
+        (!isFirst && collapse === Collapse.FILLED);
+
+      const hiddenStyle: CSSProperties = {
+        ...style,
+        opacity: 0,
+        width: 0,
+        minWidth: 0,
+        height: 0,
+        minHeight: 0,
+      };
+
+      const expandedStyle: CSSProperties = { ...style, flexGrow: 1 };
+
+      if (isHidden) {
+        return hiddenStyle;
      }
-    } else {
-      style.flexGrow = 1;
-    }
-    return style;
-  };

-  const getStyleForSecond = () => {
-    const style: CSSProperties = { overflow: "hidden" };
-    if (collapse === Collapse.FILLED) {
-      style.opacity = 0;
-      style.width = 0;
-      style.minWidth = 0;
-      style.height = 0;
-      style.minHeight = 0;
-    } else if (collapse === Collapse.SPLIT) {
-      style.flexGrow = 1;
-    } else {
-      style.flexGrow = 1;
-    }
-    return style;
-  };
+      if (collapse !== Collapse.SPLIT) {
+        return expandedStyle;
+      }

-  const onCollapse = () => {
-    if (collapse === Collapse.SPLIT) {
-      setCollapse(Collapse.COLLAPSED);
-    } else {
-      setCollapse(Collapse.SPLIT);
-    }
-  };
+      if (isFirst) {
+        const dimension = isHorizontal ? "width" : "height";
+        const minDimension = isHorizontal ? "minWidth" : "minHeight";
+        const maxDimension = isHorizontal ? "maxWidth" : "maxHeight";

-  const onExpand = () => {
-    if (collapse === Collapse.SPLIT) {
-      setCollapse(Collapse.FILLED);
-    } else {
-      setCollapse(Collapse.SPLIT);
-    }
-  };
+        const firstPanelStyle: CSSProperties = {
+          ...style,
+          [dimension]: `${firstSize}px`,
+          [minDimension]: `${min}px`,
+          [maxDimension]: isHorizontal ? "50%" : "70%",
+          flexShrink: 0,
+        };
+        return firstPanelStyle;
+      }
+
+      const secondPanelStyle: CSSProperties = {
+        ...style,
+        flexGrow: 1,
+        flexShrink: 1,
+        ...(isHorizontal
+          ? {
+              minWidth: "30%",
+              maxWidth: "70%",
+            }
+          : {
+              minHeight: "300px",
+              display: "flex",
+              flexDirection: "column",
+            }),
+      };
+      return secondPanelStyle;
+    },
+    [collapse, firstSize, isHorizontal, getConstraints],
+  );
+
+  const toggleCollapse = () =>
+    setCollapse(
+      collapse === Collapse.SPLIT ? Collapse.COLLAPSED : Collapse.SPLIT,
+    );
+
+  const toggleExpand = () =>
+    setCollapse(collapse === Collapse.SPLIT ? Collapse.FILLED : Collapse.SPLIT);

  return (
    <div className={twMerge("flex", !isHorizontal && "flex-col", className)}>
      <div
        ref={firstRef}
        className={twMerge(firstClassName, "transition-all ease-soft-spring")}
-        style={getStyleForFirst()}
+        style={getPanelStyle(true)}
      >
        {firstChild}
      </div>
@@ -171,18 +208,18 @@ export function ResizablePanel({
        <IconButton
          icon={isHorizontal ? <VscChevronLeft /> : <VscChevronUp />}
          ariaLabel="Collapse"
-          onClick={onCollapse}
+          onClick={toggleCollapse}
        />
        <IconButton
          icon={isHorizontal ? <VscChevronRight /> : <VscChevronDown />}
          ariaLabel="Expand"
-          onClick={onExpand}
+          onClick={toggleExpand}
        />
      </div>
      <div
        ref={secondRef}
        className={twMerge(secondClassName, "transition-all ease-soft-spring")}
-        style={getStyleForSecond()}
+        style={getPanelStyle(false)}
      >
        {secondChild}
      </div>
@@ -20,7 +20,7 @@ export function AdvancedOptionSwitch({
    <Switch
      isDisabled={isDisabled}
      name="use-advanced-options"
-      isSelected={showAdvancedOptions}
+      defaultSelected={showAdvancedOptions}
      onValueChange={setShowAdvancedOptions}
      classNames={{
        thumb: cn(
@@ -0,0 +1,52 @@
+import { useTranslation } from "react-i18next";
+import { Select, SelectItem } from "@nextui-org/react";
+
+interface RuntimeSizeSelectorProps {
+  isDisabled: boolean;
+  defaultValue?: number;
+}
+
+export function RuntimeSizeSelector({
+  isDisabled,
+  defaultValue,
+}: RuntimeSizeSelectorProps) {
+  const { t } = useTranslation();
+
+  return (
+    <fieldset className="flex flex-col gap-2">
+      <label
+        htmlFor="runtime-size"
+        className="font-[500] text-[#A3A3A3] text-xs"
+      >
+        {t("SETTINGS_FORM$RUNTIME_SIZE_LABEL")}
+      </label>
+      <Select
+        id="runtime-size"
+        name="runtime-size"
+        defaultSelectedKeys={[String(defaultValue || 1)]}
+        isDisabled={isDisabled}
+        aria-label={t("SETTINGS_FORM$RUNTIME_SIZE_LABEL")}
+        classNames={{
+          trigger: "bg-[#27272A] rounded-md text-sm px-3 py-[10px]",
+        }}
+      >
+        <SelectItem key="1" value={1}>
+          1x (2 core, 8G)
+        </SelectItem>
+        <SelectItem
+          key="2"
+          value={2}
+          isDisabled
+          classNames={{
+            description:
+              "whitespace-normal break-words min-w-[300px] max-w-[300px]",
+            base: "min-w-[300px] max-w-[300px]",
+          }}
+          description="Runtime sizes over 1 are disabled by default, please contact contact@all-hands.dev to get access to larger runtimes."
+        >
+          2x (4 core, 16G)
+        </SelectItem>
+      </Select>
+    </fieldset>
+  );
+}
@@ -21,6 +21,9 @@ import { ModalBackdrop } from "../modal-backdrop";
 import { ModelSelector } from "./model-selector";
 import { useSaveSettings } from "#/hooks/mutation/use-save-settings";

+import { RuntimeSizeSelector } from "./runtime-size-selector";
+import { useConfig } from "#/hooks/query/use-config";
+
 interface SettingsFormProps {
  disabled?: boolean;
  settings: Settings;
@@ -40,6 +43,7 @@ export function SettingsForm({
 }: SettingsFormProps) {
  const { mutateAsync: saveSettings } = useSaveSettings();
  const endSession = useEndSession();
+  const { data: config } = useConfig();

  const location = useLocation();
  const { t } = useTranslation();
@@ -97,6 +101,8 @@ export function SettingsForm({
    posthog.capture("settings_saved", {
      LLM_MODEL: newSettings.LLM_MODEL,
      LLM_API_KEY: newSettings.LLM_API_KEY ? "SET" : "UNSET",
+      REMOTE_RUNTIME_RESOURCE_FACTOR:
+        newSettings.REMOTE_RUNTIME_RESOURCE_FACTOR,
    });
  };

@@ -122,6 +128,8 @@ export function SettingsForm({
    }
  };

+  const isSaasMode = config?.APP_MODE === "saas";
+
  return (
    <div>
      <form
@@ -164,16 +172,21 @@ export function SettingsForm({
            isSet={settings.LLM_API_KEY === "SET"}
          />

-          {showAdvancedOptions && (
-            <AgentInput
-              isDisabled={!!disabled}
-              defaultValue={settings.AGENT}
-              agents={agents}
-            />
-          )}
-
          {showAdvancedOptions && (
            <>
+              <AgentInput
+                isDisabled={!!disabled}
+                defaultValue={settings.AGENT}
+                agents={agents}
+              />
+
+              {isSaasMode && (
+                <RuntimeSizeSelector
+                  isDisabled={!!disabled}
+                  defaultValue={settings.REMOTE_RUNTIME_RESOURCE_FACTOR}
+                />
+              )}
+
              <SecurityAnalyzerInput
                isDisabled={!!disabled}
                defaultValue={settings.SECURITY_ANALYZER}
@@ -16,7 +16,7 @@ export function SettingsModal({ onClose, settings }: SettingsModalProps) {
    <ModalBackdrop onClose={onClose}>
      <div
        data-testid="ai-config-modal"
-        className="bg-root-primary w-[384px] p-6 rounded-xl flex flex-col gap-2"
+        className="bg-root-primary min-w-[384px] max-w-[700px] p-6 rounded-xl flex flex-col gap-2"
      >
        {aiConfigOptions.error && (
          <p className="text-danger text-xs">{aiConfigOptions.error.message}</p>
@@ -5,9 +5,12 @@ import EventLogger from "#/utils/event-logger";
 import { handleAssistantMessage } from "#/services/actions";
 import { useRate } from "#/hooks/use-rate";
 import { OpenHandsParsedEvent } from "#/types/core";
-import { AgentStateChangeObservation } from "#/types/core/observations";
+import {
+  AssistantMessageAction,
+  UserMessageAction,
+} from "#/types/core/actions";

-const isOpenHandsMessage = (event: unknown): event is OpenHandsParsedEvent =>
+const isOpenHandsEvent = (event: unknown): event is OpenHandsParsedEvent =>
  typeof event === "object" &&
  event !== null &&
  "id" in event &&
@@ -15,10 +18,26 @@ const isOpenHandsMessage = (event: unknown): event is OpenHandsParsedEvent =>
  "message" in event &&
  "timestamp" in event;

-const isAgentStateChangeObservation = (
+const isUserMessage = (
  event: OpenHandsParsedEvent,
-): event is AgentStateChangeObservation =>
-  "observation" in event && event.observation === "agent_state_changed";
+): event is UserMessageAction =>
+  "source" in event &&
+  "type" in event &&
+  event.source === "user" &&
+  event.type === "message";
+
+const isAssistantMessage = (
+  event: OpenHandsParsedEvent,
+): event is AssistantMessageAction =>
+  "source" in event &&
+  "type" in event &&
+  event.source === "agent" &&
+  event.type === "message";
+
+const isMessageAction = (
+  event: OpenHandsParsedEvent,
+): event is UserMessageAction | AssistantMessageAction =>
+  isUserMessage(event) || isAssistantMessage(event);

 export enum WsClientProviderStatus {
  CONNECTED,
@@ -43,16 +62,13 @@ const WsClientContext = React.createContext<UseWsClient>({

 interface WsClientProviderProps {
  conversationId: string;
-  ghToken: string | null;
 }

 export function WsClientProvider({
-  ghToken,
  conversationId,
  children,
 }: React.PropsWithChildren<WsClientProviderProps>) {
  const sioRef = React.useRef<Socket | null>(null);
-  const ghTokenRef = React.useRef<string | null>(ghToken);
  const [status, setStatus] = React.useState(
    WsClientProviderStatus.DISCONNECTED,
  );
@@ -74,7 +90,7 @@ export function WsClientProvider({
  }

  function handleMessage(event: Record<string, unknown>) {
-    if (isOpenHandsMessage(event) && !isAgentStateChangeObservation(event)) {
+    if (isOpenHandsEvent(event) && isMessageAction(event)) {
      messageRateHandler.record(new Date().getTime());
    }
    setEvents((prevEvents) => [...prevEvents, event]);
@@ -100,6 +116,10 @@ export function WsClientProvider({
    setStatus(WsClientProviderStatus.DISCONNECTED);
  }

+  React.useEffect(() => {
+    lastEventRef.current = null;
+  }, [conversationId]);
+
  React.useEffect(() => {
    if (!conversationId) {
      throw new Error("No conversation ID provided");
@@ -118,9 +138,6 @@ export function WsClientProvider({

    sio = io(baseUrl, {
      transports: ["websocket"],
-      auth: {
-        github_token: ghToken || undefined,
-      },
      query,
    });
    sio.on("connect", handleConnect);
@@ -130,7 +147,6 @@ export function WsClientProvider({
    sio.on("disconnect", handleDisconnect);

    sioRef.current = sio;
-    ghTokenRef.current = ghToken;

    return () => {
      sio.off("connect", handleConnect);
@@ -139,7 +155,7 @@ export function WsClientProvider({
      sio.off("connect_failed", handleError);
      sio.off("disconnect", handleDisconnect);
    };
-  }, [ghToken, conversationId]);
+  }, [conversationId]);

  React.useEffect(
    () => () => {
@@ -1,11 +1,11 @@
 import { useQuery } from "@tanstack/react-query";
 import OpenHands from "#/api/open-hands";
-import { MULTI_CONVO_UI_IS_ENABLED } from "#/utils/constants";
+import { MULTI_CONVERSATION_UI } from "#/utils/feature-flags";

 export const useUserConversation = (cid: string | null) =>
  useQuery({
    queryKey: ["user", "conversation", cid],
    queryFn: () => OpenHands.getConversation(cid!),
-    enabled: MULTI_CONVO_UI_IS_ENABLED && !!cid,
+    enabled: MULTI_CONVERSATION_UI && !!cid,
    retry: false,
  });
@@ -18,6 +18,8 @@ const getSettingsQueryFn = async () => {
        CONFIRMATION_MODE: apiSettings.confirmation_mode,
        SECURITY_ANALYZER: apiSettings.security_analyzer,
        LLM_API_KEY: apiSettings.llm_api_key,
+        REMOTE_RUNTIME_RESOURCE_FACTOR:
+          apiSettings.remote_runtime_resource_factor,
      };
    }

@@ -3,8 +3,8 @@
 import React from "react";
 import { useSettingsUpToDate } from "#/context/settings-up-to-date-context";
 import {
-  DEFAULT_SETTINGS,
  getCurrentSettingsVersion,
+  DEFAULT_SETTINGS,
  getLocalStorageSettings,
 } from "#/services/settings";
 import { useSaveSettings } from "./mutation/use-save-settings";
@@ -426,6 +426,20 @@
    "fr": "Réinitialiser aux valeurs par défaut",
    "tr": "Varsayılanlara Sıfırla"
  },
+  "SETTINGS_FORM$RUNTIME_SIZE_LABEL": {
+    "en": "Runtime Settings",
+    "zh-CN": "运行时设置",
+    "de": "Laufzeiteinstellungen",
+    "ko-KR": "런타임 설정",
+    "no": "Kjøretidsinnstillinger",
+    "zh-TW": "運行時設定",
+    "it": "Impostazioni Runtime",
+    "pt": "Configurações de Runtime",
+    "es": "Configuración de Runtime",
+    "ar": "إعدادات وقت التشغيل",
+    "fr": "Paramètres d'exécution",
+    "tr": "Çalışma Zamanı Ayarları"
+  },
  "CONFIGURATION$SETTINGS_NEED_UPDATE_MESSAGE": {
    "en": "We've changed some settings in the latest update. Take a minute to review.",
    "de": "Mit dem letzten Update haben wir ein paar Einstellungen geändert. Bitte kontrollieren Ihre Einstellungen.",
@@ -1196,6 +1210,20 @@
    "fr": "L'agent attend l'entrée de l'utilisateur...",
    "tr": "Ajan kullanıcı girdisini bekliyor..."
  },
+  "CHAT_INTERFACE$AGENT_RATE_LIMITED_MESSAGE": {
+  "en": "Agent is Rate Limited",
+  "zh-CN": "智能体已达到速率限制",
+  "zh-TW": "智能體已達到速率限制",
+  "de": "Agent ist ratenbegrenzt",
+  "ko-KR": "에이전트가 속도 제한되었습니다",
+  "no": "Agenten er hastighetsbegrenset",
+  "it": "L'agente è limitato dalla frequenza",
+  "pt": "O agente está com limite de taxa",
+  "es": "El agente está limitado por tasa",
+  "ar": "الوكيل مقيد بحد السرعة",
+  "fr": "L'agent est limité en fréquence",
+  "tr": "Ajan hız sınırına ulaştı"
+  },
  "CHAT_INTERFACE$AGENT_PAUSED_MESSAGE": {
    "en": "Agent has paused.",
    "de": "Agent pausiert.",
@@ -17,26 +17,30 @@ const userPreferences = {
 const conversations: Conversation[] = [
  {
    conversation_id: "1",
-    name: "My New Project",
-    repo: null,
-    lastUpdated: new Date().toISOString(),
-    state: "running",
+    title: "My New Project",
+    selected_repository: null,
+    last_updated_at: new Date().toISOString(),
+    status: "RUNNING",
  },
  {
    conversation_id: "2",
-    name: "Repo Testing",
-    repo: "octocat/hello-world",
+    title: "Repo Testing",
+    selected_repository: "octocat/hello-world",
    // 2 days ago
-    lastUpdated: new Date(Date.now() - 2 * 24 * 60 * 60 * 1000).toISOString(),
-    state: "cold",
+    last_updated_at: new Date(
+      Date.now() - 2 * 24 * 60 * 60 * 1000,
+    ).toISOString(),
+    status: "STOPPED",
  },
  {
    conversation_id: "3",
-    name: "Another Project",
-    repo: "octocat/earth",
+    title: "Another Project",
+    selected_repository: "octocat/earth",
    // 5 days ago
-    lastUpdated: new Date(Date.now() - 5 * 24 * 60 * 60 * 1000).toISOString(),
-    state: "finished",
+    last_updated_at: new Date(
+      Date.now() - 5 * 24 * 60 * 60 * 1000,
+    ).toISOString(),
+    status: "STOPPED",
  },
 ];

@@ -182,8 +186,11 @@ export const handlers = [

  http.get("/api/options/config", () => HttpResponse.json({ APP_MODE: "oss" })),

-  http.get("/api/conversations", async () =>
-    HttpResponse.json(Array.from(CONVERSATIONS.values())),
+  http.get("/api/conversations?limit=9", async () =>
+    HttpResponse.json({
+      results: Array.from(CONVERSATIONS.values()),
+      next_page_id: null,
+    }),
  ),

  http.delete("/api/conversations/:conversationId", async ({ params }) => {
@@ -197,7 +204,7 @@ export const handlers = [
    return HttpResponse.json(null, { status: 404 });
  }),

-  http.put(
+  http.patch(
    "/api/conversations/:conversationId",
    async ({ params, request }) => {
      const { conversationId } = params;
@@ -207,10 +214,10 @@ export const handlers = [

        if (conversation) {
          const body = await request.json();
-          if (typeof body === "object" && body?.name) {
+          if (typeof body === "object" && body?.title) {
            CONVERSATIONS.set(conversationId, {
              ...conversation,
-              name: body.name,
+              title: body.title,
            });
            return HttpResponse.json(null, { status: 200 });
          }
@@ -224,10 +231,10 @@ export const handlers = [
  http.post("/api/conversations", () => {
    const conversation: Conversation = {
      conversation_id: (Math.random() * 100).toString(),
-      name: "New Conversation",
-      repo: null,
-      lastUpdated: new Date().toISOString(),
-      state: "warm",
+      title: "New Conversation",
+      selected_repository: null,
+      last_updated_at: new Date().toISOString(),
+      status: "RUNNING",
    };

    CONVERSATIONS.set(conversation.conversation_id, conversation);
@@ -34,7 +34,7 @@ import { useUserConversation } from "#/hooks/query/get-conversation-permissions"
 import { CountBadge } from "#/components/layout/count-badge";
 import { TerminalStatusLabel } from "#/components/features/terminal/terminal-status-label";
 import { useSettings } from "#/hooks/query/use-settings";
-import { MULTI_CONVO_UI_IS_ENABLED } from "#/utils/constants";
+import { MULTI_CONVERSATION_UI } from "#/utils/feature-flags";

 function AppContent() {
  const { gitHubToken } = useAuth();
@@ -73,7 +73,7 @@ function AppContent() {
  );

  React.useEffect(() => {
-    if (MULTI_CONVO_UI_IS_ENABLED && isFetched && !conversation) {
+    if (MULTI_CONVERSATION_UI && isFetched && !conversation) {
      toast.error(
        "This conversation does not exist, or you do not have permission to access it.",
      );
@@ -175,7 +175,7 @@ function AppContent() {
  }

  return (
-    <WsClientProvider ghToken={gitHubToken} conversationId={conversationId}>
+    <WsClientProvider conversationId={conversationId}>
      <EventHandler>
        <div data-testid="app-route" className="flex flex-col h-full gap-3">
          <div className="flex h-full overflow-auto">{renderMain()}</div>
@@ -16,6 +16,7 @@ import {
  StatusMessage,
 } from "#/types/message";
 import { handleObservationMessage } from "./observations";
+import { appendInput } from "#/state/command-slice";

 const messageActions = {
  [ActionType.BROWSE]: (message: ActionMessage) => {
@@ -62,6 +63,10 @@ export function handleActionMessage(message: ActionMessage) {
    return;
  }

+  if (message.action === ActionType.RUN) {
+    store.dispatch(appendInput(message.args.command));
+  }
+
  if ("args" in message && "security_risk" in message.args) {
    store.dispatch(appendSecurityAnalyzerInput(message));
  }
@@ -80,8 +80,7 @@ export function handleObservationMessage(message: ObservationMessage) {
            observation: "run" as const,
            extras: {
              command: String(message.extras.command || ""),
-              command_id: Number(message.extras.command_id || 0),
-              exit_code: Number(message.extras.exit_code || 0),
+              metadata: message.extras.metadata,
              hidden: Boolean(message.extras.hidden),
            },
          }),
@@ -8,6 +8,7 @@ export type Settings = {
  LLM_API_KEY: string | null;
  CONFIRMATION_MODE: boolean;
  SECURITY_ANALYZER: string;
+  REMOTE_RUNTIME_RESOURCE_FACTOR: number;
 };

 export type ApiSettings = {
@@ -18,6 +19,7 @@ export type ApiSettings = {
  llm_api_key: string | null;
  confirmation_mode: boolean;
  security_analyzer: string;
+  remote_runtime_resource_factor: number;
 };

 export const DEFAULT_SETTINGS: Settings = {
@@ -28,6 +30,7 @@ export const DEFAULT_SETTINGS: Settings = {
  LLM_API_KEY: null,
  CONFIRMATION_MODE: false,
  SECURITY_ANALYZER: "",
+  REMOTE_RUNTIME_RESOURCE_FACTOR: 1,
 };

 export const getCurrentSettingsVersion = () => {
@@ -66,6 +69,8 @@ export const getLocalStorageSettings = (): Settings => {
    LLM_API_KEY: llmApiKey || DEFAULT_SETTINGS.LLM_API_KEY,
    CONFIRMATION_MODE: confirmationMode || DEFAULT_SETTINGS.CONFIRMATION_MODE,
    SECURITY_ANALYZER: securityAnalyzer || DEFAULT_SETTINGS.SECURITY_ANALYZER,
+    REMOTE_RUNTIME_RESOURCE_FACTOR:
+      DEFAULT_SETTINGS.REMOTE_RUNTIME_RESOURCE_FACTOR,
  };
 };

@@ -73,3 +78,8 @@ export const getLocalStorageSettings = (): Settings => {
 * Get the default settings
 */
 export const getDefaultSettings = (): Settings => DEFAULT_SETTINGS;
+
+/**
+ * Get the current settings, either from local storage or defaults
+ */
+export const getSettings = (): Settings => getLocalStorageSettings();
@@ -93,7 +93,7 @@ export const chatSlice = createSlice({
      const translationID = `ACTION_MESSAGE$${actionID.toUpperCase()}`;
      let text = "";
      if (actionID === "run") {
-        text = `\`${action.payload.args.command}\``;
+        text = `Command:\n\`${action.payload.args.command}\``;
      } else if (actionID === "run_ipython") {
        text = `\`\`\`\n${action.payload.args.code}\n\`\`\``;
      } else if (actionID === "write") {
@@ -144,7 +144,7 @@ export const chatSlice = createSlice({
      // Set success property based on observation type
      if (observationID === "run") {
        const commandObs = observation.payload as CommandObservation;
-        causeMessage.success = commandObs.extras.exit_code === 0;
+        causeMessage.success = commandObs.extras.metadata.exit_code === 0;
      } else if (observationID === "run_ipython") {
        // For IPython, we consider it successful if there's no error message
        const ipythonObs = observation.payload as IPythonObservation;
@@ -158,7 +158,9 @@ export const chatSlice = createSlice({
        if (content.length > MAX_CONTENT_LENGTH) {
          content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
        }
-        content = `\`\`\`\n${content}\n\`\`\``;
+        content = `${
+          causeMessage.content
+        }\n\nOutput:\n\`\`\`\n${content.trim() || "[Command finished execution with no output]"}\n\`\`\``;
        causeMessage.content = content; // Observation content includes the action
      } else if (observationID === "read" || observationID === "edit") {
        const { content } = observation.payload;
@@ -8,6 +8,7 @@ export enum AgentState {
  FINISHED = "finished",
  REJECTED = "rejected",
  ERROR = "error",
+  RATE_LIMITED = "rate_limited",
  AWAITING_USER_CONFIRMATION = "awaiting_user_confirmation",
  USER_CONFIRMED = "user_confirmed",
  USER_REJECTED = "user_rejected",
@@ -13,9 +13,8 @@ export interface CommandObservation extends OpenHandsObservationEvent<"run"> {
  source: "agent";
  extras: {
    command: string;
-    command_id: number;
-    exit_code: number;
    hidden?: boolean;
+    metadata: Record<string, unknown>;
  };
 }

@@ -27,8 +27,11 @@ export interface ObservationMessage {
  // The observed data
  content: string;

-  // Additional structured data
-  extras: Record<string, string>;
+  extras: {
+    metadata: Record<string, unknown>;
+    error_id: string;
+    [key: string]: string | Record<string, unknown>;
+  };

  // A friendly message that can be put in the chat log
  message: string;
@@ -1 +0,0 @@
-export const MULTI_CONVO_UI_IS_ENABLED = false;
@@ -0,0 +1,15 @@
+function loadFeatureFlag(
+  flagName: string,
+  defaultValue: boolean = false,
+): boolean {
+  try {
+    const stringValue =
+      localStorage.getItem(`FEATURE_${flagName}`) || defaultValue.toString();
+    const value = !!JSON.parse(stringValue);
+    return value;
+  } catch (e) {
+    return defaultValue;
+  }
+}
+
+export const MULTI_CONVERSATION_UI = loadFeatureFlag("MULTI_CONVERSATION_UI");
@@ -1,5 +1,6 @@
 export const sanitizeQuery = (query: string) =>
  query
+    .trim()
    .replace(/https?:\/\//, "")
    .replace(/github.com\//, "")
    .replace(/\.git$/, "")
@@ -277,7 +277,9 @@ class CodeActAgent(Agent):
                )
            else:
                text = truncate_content(
-                    obs.content + obs.interpreter_details, max_message_chars
+                    obs.content
+                    + f'\n[Python Interpreter: {obs.metadata.py_interpreter_path}]',
+                    max_message_chars,
                )
            text += f'\n[Command finished with exit code {obs.exit_code}]'
            message = Message(role='user', content=[TextContent(text=text)])
@@ -31,8 +31,7 @@ from openhands.events.tool import ToolCallMetadata

 _BASH_DESCRIPTION = """Execute a bash command in the terminal.
 * Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
-* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process.
-* Timeout: If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background.
+* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command like `C-c` (Ctrl+C) to interrupt the process.
 """

 CmdRunTool = ChatCompletionToolParam(
@@ -45,7 +44,7 @@ CmdRunTool = ChatCompletionToolParam(
            'properties': {
                'command': {
                    'type': 'string',
-                    'description': 'The bash command to execute. Can be empty to view additional logs when previous exit code is `-1`. Can be `ctrl+c` to interrupt the currently running process.',
+                    'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process.',
                },
            },
            'required': ['command'],
@@ -18,6 +18,7 @@ from openhands.events.action import (
 from openhands.events.observation import (
    AgentStateChangedObservation,
    BrowserOutputObservation,
+    CmdOutputMetadata,
    CmdOutputObservation,
    FileReadObservation,
    FileWriteObservation,
@@ -54,11 +55,7 @@ class DummyAgent(Agent):
            },
            {
                'action': CmdRunAction(command='echo "foo"'),
-                'observations': [
-                    CmdOutputObservation(
-                        'foo', command_id=-1, command='echo "foo"', exit_code=0
-                    )
-                ],
+                'observations': [CmdOutputObservation('foo', command='echo "foo"')],
            },
            {
                'action': FileWriteAction(
@@ -81,9 +78,8 @@ class DummyAgent(Agent):
                'observations': [
                    CmdOutputObservation(
                        'bash: hello.sh: No such file or directory',
-                        command_id=-1,
                        command='bash workspace/hello.sh',
-                        exit_code=127,
+                        metadata=CmdOutputMetadata(exit_code=127),
                    )
                ],
            },
@@ -152,8 +148,6 @@ class DummyAgent(Agent):
                        obs.pop('timestamp', None)
                        obs.pop('cause', None)
                        obs.pop('source', None)
-                        if 'extras' in obs:
-                            obs['extras'].pop('command_id', None)

                    if hist_obs != expected_obs:
                        print(
@@ -5,7 +5,11 @@ import traceback
 from typing import Callable, ClassVar, Type

 import litellm
-from litellm.exceptions import BadRequestError, ContextWindowExceededError
+from litellm.exceptions import (
+    BadRequestError,
+    ContextWindowExceededError,
+    RateLimitError,
+)

 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State, TrafficControlState
@@ -187,11 +191,15 @@ class AgentController:
        self,
        e: Exception,
    ):
+        """React to an exception by setting the agent state to error and sending a status message."""
        await self.set_agent_state_to(AgentState.ERROR)
        if self.status_callback is not None:
            err_id = ''
            if isinstance(e, litellm.AuthenticationError):
                err_id = 'STATUS$ERROR_LLM_AUTHENTICATION'
+            elif isinstance(e, RateLimitError):
+                await self.set_agent_state_to(AgentState.RATE_LIMITED)
+                return
            self.status_callback('error', err_id, type(e).__name__ + ': ' + str(e))

    def step(self):
@@ -201,10 +209,15 @@ class AgentController:
        try:
            await self._step()
        except Exception as e:
-            traceback.print_exc()
-            self.log('error', f'Error while running the agent: {e}')
+            self.log(
+                'error',
+                f'Error while running the agent (session ID: {self.id}): {e}. '
+                f'Traceback: {traceback.format_exc()}',
+            )
            reported = RuntimeError(
-                'There was an unexpected error while running the agent.'
+                'There was an unexpected error while running the agent. Please '
+                f'report this error to the developers. Your session ID is {self.id}. '
+                f'Exception: {e}.'
            )
            if isinstance(e, litellm.AuthenticationError) or isinstance(
                e, litellm.BadRequestError
@@ -341,7 +354,6 @@ class AgentController:

    def _reset(self) -> None:
        """Resets the agent controller"""
-
        # make sure there is an Observation with the tool call metadata to be recognized by the agent
        # otherwise the pending action is found in history, but it's incomplete without an obs with tool result
        if self._pending_action and hasattr(self._pending_action, 'tool_call_metadata'):
@@ -382,6 +394,9 @@ class AgentController:
            return

        if new_state in (AgentState.STOPPED, AgentState.ERROR):
+            # sync existing metrics BEFORE resetting the agent
+            await self.update_state_after_step()
+            self.state.metrics.merge(self.state.local_metrics)
            self._reset()
        elif (
            new_state == AgentState.RUNNING
@@ -5,7 +5,7 @@ from openhands.events.action.commands import IPythonRunCellAction
 from openhands.events.action.empty import NullAction
 from openhands.events.action.message import MessageAction
 from openhands.events.event import Event, EventSource
-from openhands.events.observation.commands import (
+from openhands.events.observation import (
    CmdOutputObservation,
    IPythonRunCellObservation,
 )
@@ -6,11 +6,10 @@ from uuid import uuid4
 from termcolor import colored

 import openhands.agenthub  # noqa F401 (we import this to get the agents registered)
-from openhands import __version__
 from openhands.core.config import (
    AppConfig,
-    get_parser,
-    load_app_config,
+    parse_arguments,
+    setup_config_from_args,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.loop import run_agent_until_done
@@ -84,27 +83,30 @@ def display_event(event: Event, config: AppConfig):
        display_confirmation(event.confirmation_state)


-async def main(loop):
+def read_input(config: AppConfig) -> str:
+    """Read input from user based on config settings."""
+    if config.cli_multiline_input:
+        print('Enter your message (enter "/exit" on a new line to finish):')
+        lines = []
+        while True:
+            line = input('>> ').rstrip()
+            if line == '/exit':  # finish input
+                break
+            lines.append(line)
+        return '\n'.join(lines)
+    else:
+        return input('>> ').rstrip()
+
+
+async def main(loop: asyncio.AbstractEventLoop):
    """Runs the agent in CLI mode"""

-    parser = get_parser()
-    # Add the version argument
-    parser.add_argument(
-        '-v',
-        '--version',
-        action='version',
-        version=f'{__version__}',
-        help='Show the version number and exit',
-        default=None,
-    )
-    args = parser.parse_args()
-
-    if args.version:
-        print(f'OpenHands version: {__version__}')
-        return
+    args = parse_arguments()

    logger.setLevel(logging.WARNING)
-    config = load_app_config(config_file=args.config_file)
+
+    config = setup_config_from_args(args)
+
    sid = str(uuid4())

    runtime = create_runtime(config, sid=sid, headless_mode=True)
@@ -116,9 +118,7 @@ async def main(loop):

    async def prompt_for_next_task():
        # Run input() in a thread pool to avoid blocking the event loop
-        next_message = await loop.run_in_executor(
-            None, lambda: input('How can I help? >> ')
-        )
+        next_message = await loop.run_in_executor(None, read_input, config)
        if not next_message.strip():
            await prompt_for_next_task()
        if next_message == 'exit':
@@ -16,6 +16,7 @@ from openhands.core.config.utils import (
    load_from_env,
    load_from_toml,
    parse_arguments,
+    setup_config_from_args,
 )

 __all__ = [
@@ -34,4 +35,5 @@ __all__ = [
    'get_field_info',
    'get_parser',
    'parse_arguments',
+    'setup_config_from_args',
 ]
@@ -42,6 +42,8 @@ class AppConfig:
        file_uploads_max_file_size_mb: Maximum file upload size in MB. `0` means unlimited.
        file_uploads_restrict_file_types: Whether to restrict upload file types.
        file_uploads_allowed_extensions: Allowed file extensions. `['.*']` allows all.
+        cli_multiline_input: Whether to enable multiline input in CLI. When disabled,
+            input is read line by line. When enabled, input continues until /exit command.
    """

    llms: dict[str, LLMConfig] = field(default_factory=dict)
@@ -71,6 +73,7 @@ class AppConfig:
    file_uploads_restrict_file_types: bool = False
    file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*'])
    runloop_api_key: str | None = None
+    cli_multiline_input: bool = False

    defaults_dict: ClassVar[dict] = {}

@@ -34,6 +34,7 @@ class SandboxConfig:
        platform: The platform on which the image should be built. Default is None.
        remote_runtime_resource_factor: Factor to scale the resource allocation for remote runtime.
            Must be one of [1, 2, 4, 8]. Will only be used if the runtime is remote.
+        enable_gpu: Whether to enable GPU.
    """

    remote_runtime_api_url: str = 'http://localhost:8000'
@@ -57,8 +58,9 @@ class SandboxConfig:
    runtime_startup_env_vars: dict[str, str] = field(default_factory=dict)
    browsergym_eval_env: str | None = None
    platform: str | None = None
-    close_delay: int = 15
+    close_delay: int = 900
    remote_runtime_resource_factor: int = 1
+    enable_gpu: bool = False

    def defaults_to_dict(self) -> dict:
        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
@@ -2,6 +2,7 @@ import argparse
 import os
 import pathlib
 import platform
+import sys
 from dataclasses import is_dataclass
 from types import UnionType
 from typing import Any, MutableMapping, get_args, get_origin
@@ -311,8 +312,14 @@ def get_llm_config_arg(

 # Command line arguments
 def get_parser() -> argparse.ArgumentParser:
-    """Get the parser for the command line arguments."""
-    parser = argparse.ArgumentParser(description='Run an agent with a specific task')
+    """Get the argument parser."""
+    parser = argparse.ArgumentParser(description='Run the agent via CLI')
+
+    # Add version argument
+    parser.add_argument(
+        '-v', '--version', action='store_true', help='Show version information'
+    )
+
    parser.add_argument(
        '--config-file',
        type=str,
@@ -406,16 +413,23 @@ def get_parser() -> argparse.ArgumentParser:
    parser.add_argument(
        '--no-auto-continue',
        action='store_true',
-        help='Disable automatic "continue" responses. Will read from stdin instead.',
+        help='Disable automatic "continue" responses in headless mode. Will read from stdin instead.',
    )
    return parser


 def parse_arguments() -> argparse.Namespace:
-    """Parse the command line arguments."""
+    """Parse command line arguments."""
    parser = get_parser()
-    parsed_args, _ = parser.parse_known_args()
-    return parsed_args
+    args = parser.parse_args()
+
+    if args.version:
+        from openhands import __version__
+
+        print(f'OpenHands version: {__version__}')
+        sys.exit(0)
+
+    return args


 def load_app_config(
@@ -435,3 +449,31 @@ def load_app_config(
        logger.DEBUG = config.debug
        logger.DISABLE_COLOR_PRINTING = config.disable_color
    return config
+
+
+def setup_config_from_args(args: argparse.Namespace) -> AppConfig:
+    """Load config from toml and override with command line arguments.
+
+    Common setup used by both CLI and main.py entry points.
+    """
+    # Load base config from toml and env vars
+    config = load_app_config(config_file=args.config_file)
+
+    # Override with command line arguments if provided
+    if args.llm_config:
+        llm_config = get_llm_config_arg(args.llm_config)
+        if llm_config is None:
+            raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
+        config.set_llm_config(llm_config)
+
+    # Override default agent if provided
+    if args.agent_cls:
+        config.default_agent = args.agent_cls
+
+    # Set max iterations and max budget per task if provided, otherwise fall back to config values
+    if args.max_iterations is not None:
+        config.max_iterations = args.max_iterations
+    if args.max_budget_per_task is not None:
+        config.max_budget_per_task = args.max_budget_per_task
+
+    return config
@@ -9,9 +9,8 @@ from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import (
    AppConfig,
-    get_llm_config_arg,
-    load_app_config,
    parse_arguments,
+    setup_config_from_args,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.loop import run_agent_until_done
@@ -51,6 +50,21 @@ def read_task_from_stdin() -> str:
    return sys.stdin.read()


+def read_input(config: AppConfig) -> str:
+    """Read input from user based on config settings."""
+    if config.cli_multiline_input:
+        print('Enter your message (enter "/exit" on a new line to finish):')
+        lines = []
+        while True:
+            line = input('>> ').rstrip()
+            if line == '/exit':  # finish input
+                break
+            lines.append(line)
+        return '\n'.join(lines)
+    else:
+        return input('>> ').rstrip()
+
+
 async def run_controller(
    config: AppConfig,
    initial_user_action: Action,
@@ -120,9 +134,7 @@ async def run_controller(
                if exit_on_message:
                    message = '/exit'
                elif fake_user_response_fn is None:
-                    # read until EOF (Ctrl+D on Unix, Ctrl+Z on Windows)
-                    print('Request user input (press Ctrl+D/Z when done) >> ')
-                    message = sys.stdin.read().rstrip()
+                    message = read_input(config)
                else:
                    message = fake_user_response_fn(controller.get_state())
                action = MessageAction(content=message)
@@ -195,31 +207,13 @@ if __name__ == '__main__':
    else:
        raise ValueError('No task provided. Please specify a task through -t, -f.')
    initial_user_action: MessageAction = MessageAction(content=task_str)
-    # Load the app config
-    # this will load config from config.toml in the current directory
-    # as well as from the environment variables
-    config = load_app_config(config_file=args.config_file)

-    # Override default LLM configs ([llm] section in config.toml)
-    if args.llm_config:
-        llm_config = get_llm_config_arg(args.llm_config)
-        if llm_config is None:
-            raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
-        config.set_llm_config(llm_config)
-
-    # Set default agent
-    config.default_agent = args.agent_cls
+    config = setup_config_from_args(args)

    # Set session name
    session_name = args.name
    sid = generate_sid(config, session_name)

-    # if max budget per task is not sent on the command line, use the config value
-    if args.max_budget_per_task is not None:
-        config.max_budget_per_task = args.max_budget_per_task
-    if args.max_iterations is not None:
-        config.max_iterations = args.max_iterations
-
    asyncio.run(
        run_controller(
            config=config,
@@ -49,3 +49,7 @@ class AgentState(str, Enum):
    USER_REJECTED = 'user_rejected'
    """The user rejected the agent's action.
    """
+
+    RATE_LIMITED = 'rate_limited'
+    """The agent is rate limited.
+    """
@@ -6,6 +6,7 @@ from litellm.types.utils import ModelResponse

 from openhands.core.exceptions import LLMResponseError
 from openhands.events.event import Event
+from openhands.events.observation import CmdOutputMetadata
 from openhands.events.serialization import event_to_dict
 from openhands.llm.metrics import Metrics

@@ -20,6 +21,8 @@ def my_default_encoder(obj):
        return obj.get()
    if isinstance(obj, ModelResponse):
        return obj.model_dump()
+    if isinstance(obj, CmdOutputMetadata):
+        return obj.model_dump()
    return json.JSONEncoder().default(obj)


@@ -12,19 +12,11 @@ from openhands.events.action.action import (
@dataclass
 class CmdRunAction(Action):
    command: str
+    # When `command` is empty, it will be used to print the current tmux window
    thought: str = ''
    blocking: bool = False
-    # If False, the command will be run in a non-blocking / interactive way
-    # The partial command outputs will be returned as output observation.
-    # If True, the command will be run for max .timeout seconds.
-    keep_prompt: bool = True
-    # if True, the command prompt will be kept in the command output observation
-    # Example of command output:
-    # root@sandbox:~# ls
-    # file1.txt
-    # file2.txt
-    # root@sandbox:~# <-- this is the command prompt
-
+    # If blocking is True, the command will be run in a blocking manner.
+    # e.g., it will NOT return early due to soft timeout.
    hidden: bool = False
    action: str = ActionType.RUN
    runnable: ClassVar[bool] = True
@@ -48,6 +48,15 @@ class FileWriteAction(Action):
    def message(self) -> str:
        return f'Writing file: {self.path}'

+    def __repr__(self) -> str:
+        return (
+            f'**FileWriteAction**\n'
+            f'Path: {self.path}\n'
+            f'Range: [L{self.start}:L{self.end}]\n'
+            f'Thought: {self.thought}\n'
+            f'Content:\n```\n{self.content}\n```\n'
+        )
+

@dataclass
 class FileEditAction(Action):
@@ -1,6 +1,7 @@
 from openhands.events.observation.agent import AgentStateChangedObservation
 from openhands.events.observation.browse import BrowserOutputObservation
 from openhands.events.observation.commands import (
+    CmdOutputMetadata,
    CmdOutputObservation,
    IPythonRunCellObservation,
 )
@@ -20,6 +21,7 @@ __all__ = [
    'Observation',
    'NullObservation',
    'CmdOutputObservation',
+    'CmdOutputMetadata',
    'IPythonRunCellObservation',
    'BrowserOutputObservation',
    'FileReadObservation',
@@ -1,19 +1,136 @@
-from dataclasses import dataclass
+import json
+import re
+import traceback
+from dataclasses import dataclass, field
+from typing import Self

+from pydantic import BaseModel
+
+from openhands.core.logger import openhands_logger as logger
 from openhands.core.schema import ObservationType
 from openhands.events.observation.observation import Observation

+CMD_OUTPUT_PS1_BEGIN = '\n###PS1JSON###\n'
+CMD_OUTPUT_PS1_END = '\n###PS1END###'
+CMD_OUTPUT_METADATA_PS1_REGEX = re.compile(
+    f'^{CMD_OUTPUT_PS1_BEGIN.strip()}(.*?){CMD_OUTPUT_PS1_END.strip()}',
+    re.DOTALL | re.MULTILINE,
+)
+
+
+class CmdOutputMetadata(BaseModel):
+    """Additional metadata captured from PS1"""
+
+    exit_code: int = -1
+    pid: int = -1
+    username: str | None = None
+    hostname: str | None = None
+    working_dir: str | None = None
+    py_interpreter_path: str | None = None
+    prefix: str = ''  # Prefix to add to command output
+    suffix: str = ''  # Suffix to add to command output
+
+    @classmethod
+    def to_ps1_prompt(cls) -> str:
+        """Convert the required metadata into a PS1 prompt."""
+        prompt = CMD_OUTPUT_PS1_BEGIN
+        json_str = json.dumps(
+            {
+                'pid': '$!',
+                'exit_code': '$?',
+                'username': r'\u',
+                'hostname': r'\h',
+                'working_dir': r'$(pwd)',
+                'py_interpreter_path': r'$(which python 2>/dev/null || echo "")',
+            },
+            indent=2,
+        )
+        # Make sure we escape double quotes in the JSON string
+        # So that PS1 will keep them as part of the output
+        prompt += json_str.replace('"', r'\"')
+        prompt += CMD_OUTPUT_PS1_END + '\n'  # Ensure there's a newline at the end
+        return prompt
+
+    @classmethod
+    def matches_ps1_metadata(cls, string: str) -> list[re.Match[str]]:
+        matches = []
+        for match in CMD_OUTPUT_METADATA_PS1_REGEX.finditer(string):
+            try:
+                json.loads(match.group(1).strip())  # Try to parse as JSON
+                matches.append(match)
+            except json.JSONDecodeError:
+                logger.warning(
+                    f'Failed to parse PS1 metadata: {match.group(1)}. Skipping.'
+                    + traceback.format_exc()
+                )
+                continue  # Skip if not valid JSON
+        return matches
+
+    @classmethod
+    def from_ps1_match(cls, match: re.Match[str]) -> Self:
+        """Extract the required metadata from a PS1 prompt."""
+        metadata = json.loads(match.group(1))
+        # Create a copy of metadata to avoid modifying the original
+        processed = metadata.copy()
+        # Convert numeric fields
+        if 'pid' in metadata:
+            try:
+                processed['pid'] = int(float(str(metadata['pid'])))
+            except (ValueError, TypeError):
+                processed['pid'] = -1
+        if 'exit_code' in metadata:
+            try:
+                processed['exit_code'] = int(float(str(metadata['exit_code'])))
+            except (ValueError, TypeError):
+                logger.warning(
+                    f'Failed to parse exit code: {metadata["exit_code"]}. Setting to -1.'
+                )
+                processed['exit_code'] = -1
+        return cls(**processed)
+

@dataclass
 class CmdOutputObservation(Observation):
    """This data class represents the output of a command."""

-    command_id: int
    command: str
-    exit_code: int = 0
-    hidden: bool = False
    observation: str = ObservationType.RUN
-    interpreter_details: str = ''
+    # Additional metadata captured from PS1
+    metadata: CmdOutputMetadata = field(default_factory=CmdOutputMetadata)
+    # Whether the command output should be hidden from the user
+    hidden: bool = False
+
+    def __init__(
+        self,
+        content: str,
+        command: str,
+        observation: str = ObservationType.RUN,
+        metadata: dict | CmdOutputMetadata | None = None,
+        hidden: bool = False,
+        **kwargs,
+    ):
+        super().__init__(content)
+        self.command = command
+        self.observation = observation
+        self.hidden = hidden
+        if isinstance(metadata, dict):
+            self.metadata = CmdOutputMetadata(**metadata)
+        else:
+            self.metadata = metadata or CmdOutputMetadata()
+
+        # Handle legacy attribute
+        if 'exit_code' in kwargs:
+            self.metadata.exit_code = kwargs['exit_code']
+        if 'command_id' in kwargs:
+            self.metadata.pid = kwargs['command_id']
+
+    @property
+    def command_id(self) -> int:
+        return self.metadata.pid
+
+    @property
+    def exit_code(self) -> int:
+        return self.metadata.exit_code

    @property
    def error(self) -> bool:
@@ -28,7 +145,21 @@ class CmdOutputObservation(Observation):
        return not self.error

    def __str__(self) -> str:
-        return f'**CmdOutputObservation (source={self.source}, exit code={self.exit_code})**\n{self.content}'
+        return (
+            f'**CmdOutputObservation (source={self.source}, exit code={self.exit_code}, '
+            f'metadata={json.dumps(self.metadata.model_dump(), indent=2)})**\n'
+            '--BEGIN AGENT OBSERVATION--\n'
+            f'{self._to_agent_observation()}\n'
+            '--END AGENT OBSERVATION--'
+        )
+
+    def _to_agent_observation(self) -> str:
+        ret = f'{self.metadata.prefix}{self.content}{self.metadata.suffix}'
+        if self.metadata.working_dir:
+            ret += f'\n[Current working directory: {self.metadata.working_dir}]'
+        if self.metadata.py_interpreter_path:
+            ret += f'\n[Python interpreter: {self.metadata.py_interpreter_path}]'
+        return ret


@dataclass
@@ -18,6 +18,9 @@ class FileReadObservation(Observation):
    def message(self) -> str:
        return f'I read the file {self.path}.'

+    def __str__(self) -> str:
+        return f'[Read from {self.path} is successful.]\n' f'{self.content}'
+

@dataclass
 class FileWriteObservation(Observation):
@@ -30,6 +33,9 @@ class FileWriteObservation(Observation):
    def message(self) -> str:
        return f'I wrote to the file {self.path}.'

+    def __str__(self) -> str:
+        return f'[Write to {self.path} is successful.]\n' f'{self.content}'
+

@dataclass
 class FileEditObservation(Observation):
@@ -67,6 +67,10 @@ def action_from_dict(action: dict) -> Action:
    if 'images_urls' in args:
        args['image_urls'] = args.pop('images_urls')

+    # keep_prompt has been deprecated in https://github.com/All-Hands-AI/OpenHands/pull/4881
+    if 'keep_prompt' in args:
+        args.pop('keep_prompt')
+
    try:
        decoded_action = action_class(**args)
        if 'timeout' in action:
@@ -1,6 +1,8 @@
 from dataclasses import asdict
 from datetime import datetime

+from pydantic import BaseModel
+
 from openhands.events import Event, EventSource
 from openhands.events.observation.observation import Observation
 from openhands.events.serialization.action import action_from_dict
@@ -56,6 +58,12 @@ def event_from_dict(data) -> 'Event':
    return evt


+def _convert_pydantic_to_dict(obj: BaseModel | dict) -> dict:
+    if isinstance(obj, BaseModel):
+        return obj.model_dump()
+    return obj
+
+
 def event_to_dict(event: 'Event') -> dict:
    props = asdict(event)
    d = {}
@@ -82,7 +90,11 @@ def event_to_dict(event: 'Event') -> dict:
            d['timeout'] = event.timeout
    elif 'observation' in d:
        d['content'] = props.pop('content', '')
-        d['extras'] = props
+
+        # props is a dict whose values can include a complex object like an instance of a BaseModel subclass
+        # such as CmdOutputMetadata
+        # we serialize it along with the rest
+        d['extras'] = {k: _convert_pydantic_to_dict(v) for k, v in props.items()}
        # Include success field for CmdOutputObservation
        if hasattr(event, 'success'):
            d['success'] = event.success
@@ -109,7 +121,6 @@ def event_to_memory(event: 'Event', max_message_chars: int) -> dict:
    # runnable actions have some extra fields used in the BE/FE, which should not be sent to the LLM
    if 'args' in d:
        d['args'].pop('blocking', None)
-        d['args'].pop('keep_prompt', None)
        d['args'].pop('confirmation_state', None)

    if 'extras' in d:
@@ -1,6 +1,9 @@
+import copy
+
 from openhands.events.observation.agent import AgentStateChangedObservation
 from openhands.events.observation.browse import BrowserOutputObservation
 from openhands.events.observation.commands import (
+    CmdOutputMetadata,
    CmdOutputObservation,
    IPythonRunCellObservation,
 )
@@ -37,6 +40,26 @@ OBSERVATION_TYPE_TO_CLASS = {
 }


+def _update_cmd_output_metadata(
+    metadata: dict | CmdOutputMetadata | None, **kwargs
+) -> dict | CmdOutputMetadata:
+    """Update the metadata of a CmdOutputObservation.
+
+    If metadata is None, create a new CmdOutputMetadata instance.
+    If metadata is a dict, update the dict.
+    If metadata is a CmdOutputMetadata instance, update the instance.
+    """
+    if metadata is None:
+        return CmdOutputMetadata(**kwargs)
+
+    if isinstance(metadata, dict):
+        metadata.update(**kwargs)
+    elif isinstance(metadata, CmdOutputMetadata):
+        for key, value in kwargs.items():
+            setattr(metadata, key, value)
+    return metadata
+
+
 def observation_from_dict(observation: dict) -> Observation:
    observation = observation.copy()
    if 'observation' not in observation:
@@ -49,6 +72,24 @@ def observation_from_dict(observation: dict) -> Observation:
    observation.pop('observation')
    observation.pop('message', None)
    content = observation.pop('content', '')
-    extras = observation.pop('extras', {})
+    extras = copy.deepcopy(observation.pop('extras', {}))
+
+    # Handle legacy attributes for CmdOutputObservation
+    if 'exit_code' in extras:
+        extras['metadata'] = _update_cmd_output_metadata(
+            extras.get('metadata', None), exit_code=extras.pop('exit_code')
+        )
+    if 'command_id' in extras:
+        extras['metadata'] = _update_cmd_output_metadata(
+            extras.get('metadata', None), pid=extras.pop('command_id')
+        )
+    # convert metadata to CmdOutputMetadata if it is a dict
+    if observation_class is CmdOutputObservation:
+        if 'metadata' in extras and isinstance(extras['metadata'], dict):
+            extras['metadata'] = CmdOutputMetadata(**extras['metadata'])
+        elif 'metadata' in extras and isinstance(extras['metadata'], CmdOutputMetadata):
+            pass
+        else:
+            extras['metadata'] = CmdOutputMetadata()

    return observation_class(content=content, **extras)
@@ -1,9 +1,10 @@
 import asyncio
+import queue
 import threading
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
 from enum import Enum
-from queue import Queue
+from functools import partial
 from typing import Callable, Iterable

 from openhands.core.logger import openhands_logger as logger
@@ -61,12 +62,19 @@ class EventStream:
    _subscribers: dict[str, dict[str, Callable]]
    _cur_id: int = 0
    _lock: threading.Lock
+    _queue: queue.Queue[Event]
+    _queue_thread: threading.Thread
+    _queue_loop: asyncio.AbstractEventLoop | None
+    _thread_loops: dict[str, dict[str, asyncio.AbstractEventLoop]]

-    def __init__(self, sid: str, file_store: FileStore, num_workers: int = 1):
+    def __init__(self, sid: str, file_store: FileStore):
        self.sid = sid
        self.file_store = file_store
-        self._queue: Queue[Event] = Queue()
+        self._stop_flag = threading.Event()
+        self._queue: queue.Queue[Event] = queue.Queue()
        self._thread_pools: dict[str, dict[str, ThreadPoolExecutor]] = {}
+        self._thread_loops: dict[str, dict[str, asyncio.AbstractEventLoop]] = {}
+        self._queue_loop = None
        self._queue_thread = threading.Thread(target=self._run_queue_loop)
        self._queue_thread.daemon = True
        self._queue_thread.start()
@@ -91,9 +99,54 @@ class EventStream:
            if id >= self._cur_id:
                self._cur_id = id + 1

-    def _init_thread_loop(self):
+    def _init_thread_loop(self, subscriber_id: str, callback_id: str):
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
+        if subscriber_id not in self._thread_loops:
+            self._thread_loops[subscriber_id] = {}
+        self._thread_loops[subscriber_id][callback_id] = loop
+
+    def close(self):
+        self._stop_flag.set()
+        if self._queue_thread.is_alive():
+            self._queue_thread.join()
+
+        subscriber_ids = list(self._subscribers.keys())
+        for subscriber_id in subscriber_ids:
+            callback_ids = list(self._subscribers[subscriber_id].keys())
+            for callback_id in callback_ids:
+                self._clean_up_subscriber(subscriber_id, callback_id)
+
+    def _clean_up_subscriber(self, subscriber_id: str, callback_id: str):
+        if subscriber_id not in self._subscribers:
+            logger.warning(f'Subscriber not found during cleanup: {subscriber_id}')
+            return
+        if callback_id not in self._subscribers[subscriber_id]:
+            logger.warning(f'Callback not found during cleanup: {callback_id}')
+            return
+        if (
+            subscriber_id in self._thread_loops
+            and callback_id in self._thread_loops[subscriber_id]
+        ):
+            loop = self._thread_loops[subscriber_id][callback_id]
+            try:
+                loop.stop()
+                loop.close()
+            except Exception as e:
+                logger.warning(
+                    f'Error closing loop for {subscriber_id}/{callback_id}: {e}'
+                )
+            del self._thread_loops[subscriber_id][callback_id]
+
+        if (
+            subscriber_id in self._thread_pools
+            and callback_id in self._thread_pools[subscriber_id]
+        ):
+            pool = self._thread_pools[subscriber_id][callback_id]
+            pool.shutdown()
+            del self._thread_pools[subscriber_id][callback_id]
+
+        del self._subscribers[subscriber_id][callback_id]

    def _get_filename_for_id(self, id: int) -> str:
        return get_conversation_event_filename(self.sid, id)
@@ -176,7 +229,8 @@ class EventStream:
    def subscribe(
        self, subscriber_id: EventStreamSubscriber, callback: Callable, callback_id: str
    ):
-        pool = ThreadPoolExecutor(max_workers=1, initializer=self._init_thread_loop)
+        initializer = partial(self._init_thread_loop, subscriber_id, callback_id)
+        pool = ThreadPoolExecutor(max_workers=1, initializer=initializer)
        if subscriber_id not in self._subscribers:
            self._subscribers[subscriber_id] = {}
            self._thread_pools[subscriber_id] = {}
@@ -198,7 +252,7 @@ class EventStream:
            logger.warning(f'Callback not found during unsubscribe: {callback_id}')
            return

-        del self._subscribers[subscriber_id][callback_id]
+        self._clean_up_subscriber(subscriber_id, callback_id)

    def add_event(self, event: Event, source: EventSource):
        if hasattr(event, '_id') and event.id is not None:
@@ -217,13 +271,20 @@ class EventStream:
        self._queue.put(event)

    def _run_queue_loop(self):
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        loop.run_until_complete(self._process_queue())
+        self._queue_loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self._queue_loop)
+        try:
+            self._queue_loop.run_until_complete(self._process_queue())
+        finally:
+            self._queue_loop.close()

    async def _process_queue(self):
-        while should_continue():
-            event = self._queue.get()
+        while should_continue() and not self._stop_flag.is_set():
+            event = None
+            try:
+                event = self._queue.get(timeout=0.1)
+            except queue.Empty:
+                continue
            for key in sorted(self._subscribers.keys()):
                callbacks = self._subscribers[key]
                for callback_id in callbacks:
@@ -2,9 +2,11 @@ from openhands.core.logger import openhands_logger as logger
 from openhands.events.action.action import Action
 from openhands.events.action.empty import NullAction
 from openhands.events.event import Event
-from openhands.events.observation.commands import CmdOutputObservation
-from openhands.events.observation.empty import NullObservation
-from openhands.events.observation.observation import Observation
+from openhands.events.observation import (
+    CmdOutputObservation,
+    NullObservation,
+    Observation,
+)


 def get_pairs_from_events(events: list[Event]) -> list[tuple[Action, Observation]]:
@@ -122,12 +122,6 @@ class LLM(RetryMixin, DebugMixin):
        if self.is_function_calling_active():
            logger.debug('LLM: model supports function calling')

-        # Compatibility flag: use string serializer for DeepSeek models
-        # See this issue: https://github.com/All-Hands-AI/OpenHands/issues/5818
-        self._use_string_serializer = False
-        if 'deepseek' in self.config.model:
-            self._use_string_serializer = True
-
        # if using a custom tokenizer, make sure it's loaded and accessible in the format expected by litellm
        if self.config.custom_tokenizer is not None:
            self.tokenizer = create_pretrained_tokenizer(self.config.custom_tokenizer)
@@ -449,21 +443,14 @@ class LLM(RetryMixin, DebugMixin):

        # Handle native_tool_calling user-defined configuration
        if self.config.native_tool_calling is None:
-            logger.debug(
-                f'Using default tool calling behavior based on model evaluation: {model_name_supported}'
-            )
            return model_name_supported
        elif self.config.native_tool_calling is False:
-            logger.debug('Function calling explicitly disabled via configuration')
            return False
        else:
            # try to enable native tool calling if supported by the model
            supports_fn_call = litellm.supports_function_calling(
                model=self.config.model
            )
-            logger.debug(
-                f'Function calling explicitly enabled, litellm support: {supports_fn_call}'
-            )
            return supports_fn_call

    def _post_completion(self, response: ModelResponse) -> float:
@@ -612,10 +599,30 @@ class LLM(RetryMixin, DebugMixin):

        try:
            # try directly get response_cost from response
-            cost = getattr(response, '_hidden_params', {}).get('response_cost', None)
+            _hidden_params = getattr(response, '_hidden_params', {})
+            cost = _hidden_params.get('response_cost', None)
            if cost is None:
+                cost = float(
+                    _hidden_params.get('additional_headers', {}).get(
+                        'llm_provider-x-litellm-response-cost', 0.0
+                    )
+                )
+
+            if cost is None:
+                try:
+                    cost = litellm_completion_cost(
+                        completion_response=response, **extra_kwargs
+                    )
+                except Exception as e:
+                    logger.error(f'Error getting cost from litellm: {e}')
+
+            if cost is None:
+                _model_name = '/'.join(self.config.model.split('/')[1:])
                cost = litellm_completion_cost(
-                    completion_response=response, **extra_kwargs
+                    completion_response=response, model=_model_name, **extra_kwargs
+                )
+                logger.debug(
+                    f'Using fallback model name {_model_name} to get cost: {cost}'
                )
            self.metrics.add_cost(cost)
            return cost
@@ -37,9 +37,9 @@ class IssueHandlerInterface(ABC):

    @abstractmethod
    def guess_success(
-        self, issue: GithubIssue, history: list[Event]
+        self, issue: GithubIssue, history: list[Event], git_patch: str | None = None
    ) -> tuple[bool, list[bool] | None, str]:
-        """Guess if the issue has been resolved based on the agent's output."""
+        """Guess if the issue has been resolved based on the agent's output and git patch."""
        pass


@@ -249,13 +249,14 @@ class IssueHandler(IssueHandlerInterface):
        )

    def guess_success(
-        self, issue: GithubIssue, history: list[Event]
+        self, issue: GithubIssue, history: list[Event], git_patch: str | None = None
    ) -> tuple[bool, None | list[bool], str]:
        """Guess if the issue is fixed based on the history and the issue description.

        Args:
            issue: The issue to check
            history: The agent's history
+            git_patch: Optional git patch showing the changes made
        """
        last_message = history[-1].message

@@ -665,6 +666,7 @@ class PRHandler(IssueHandler):
        review_thread: ReviewThread,
        issues_context: str,
        last_message: str,
+        git_patch: str | None = None,
    ) -> tuple[bool, str]:
        """Check if a review thread's feedback has been addressed."""
        files_context = json.dumps(review_thread.files, indent=4)
@@ -683,6 +685,7 @@ class PRHandler(IssueHandler):
            feedback=review_thread.comment,
            files_context=files_context,
            last_message=last_message,
+            git_patch=git_patch or 'No changes made yet',
        )

        return self._check_feedback_with_llm(prompt)
@@ -692,6 +695,7 @@ class PRHandler(IssueHandler):
        thread_comments: list[str],
        issues_context: str,
        last_message: str,
+        git_patch: str | None = None,
    ) -> tuple[bool, str]:
        """Check if thread comments feedback has been addressed."""
        thread_context = '\n---\n'.join(thread_comments)
@@ -708,6 +712,7 @@ class PRHandler(IssueHandler):
            issue_context=issues_context,
            thread_context=thread_context,
            last_message=last_message,
+            git_patch=git_patch or 'No changes made yet',
        )

        return self._check_feedback_with_llm(prompt)
@@ -717,6 +722,7 @@ class PRHandler(IssueHandler):
        review_comments: list[str],
        issues_context: str,
        last_message: str,
+        git_patch: str | None = None,
    ) -> tuple[bool, str]:
        """Check if review comments feedback has been addressed."""
        review_context = '\n---\n'.join(review_comments)
@@ -733,15 +739,17 @@ class PRHandler(IssueHandler):
            issue_context=issues_context,
            review_context=review_context,
            last_message=last_message,
+            git_patch=git_patch or 'No changes made yet',
        )

        return self._check_feedback_with_llm(prompt)

    def guess_success(
-        self, issue: GithubIssue, history: list[Event]
+        self, issue: GithubIssue, history: list[Event], git_patch: str | None = None
    ) -> tuple[bool, None | list[bool], str]:
-        """Guess if the issue is fixed based on the history and the issue description."""
+        """Guess if the issue is fixed based on the history, issue description and git patch."""
        last_message = history[-1].message
+
        issues_context = json.dumps(issue.closing_issues, indent=4)
        success_list = []
        explanation_list = []
@@ -751,7 +759,7 @@ class PRHandler(IssueHandler):
            for review_thread in issue.review_threads:
                if issues_context and last_message:
                    success, explanation = self._check_review_thread(
-                        review_thread, issues_context, last_message
+                        review_thread, issues_context, last_message, git_patch
                    )
                else:
                    success, explanation = False, 'Missing context or message'
@@ -761,7 +769,7 @@ class PRHandler(IssueHandler):
        elif issue.thread_comments:
            if issue.thread_comments and issues_context and last_message:
                success, explanation = self._check_thread_comments(
-                    issue.thread_comments, issues_context, last_message
+                    issue.thread_comments, issues_context, last_message, git_patch
                )
            else:
                success, explanation = (
@@ -774,7 +782,7 @@ class PRHandler(IssueHandler):
            # Handle PRs with only review comments (no file-specific review comments or thread comments)
            if issue.review_comments and issues_context and last_message:
                success, explanation = self._check_review_comments(
-                    issue.review_comments, issues_context, last_message
+                    issue.review_comments, issues_context, last_message, git_patch
                )
            else:
                success, explanation = (
@@ -1,4 +1,4 @@
-Given the following issue description and the last message from an AI agent attempting to fix it, determine if the issue has been successfully resolved.
+Given the following issue description and the last message from an AI agent attempting to fix it, determine if the issue has been successfully resolved based on the changes made and their expected impact. Make your own judgment based on the evidence provided - do NOT defer to or wait for human review.

 Issue description:
 {{ issue_context }}
@@ -6,10 +6,10 @@ Issue description:
 Last message from AI agent:
 {{ last_message }}

-(1) has the issue been successfully resolved?
-(2) If the issue has been resolved, please provide an explanation of what was done in the PR that can be sent to a human reviewer on github. If the issue has not been resolved, please provide an explanation of why.
+(1) Based on the changes made and their expected impact, has the issue been successfully resolved?
+(2) Provide a clear explanation of what was done in the PR and whether it addresses the issue. Focus on the concrete changes and their expected effects, not on the need for external verification.

-Answer in exactly the format below, with only true or false for success, and an explanation of the result.
+Answer in exactly the format below, with only true or false for success, and an explanation of the result that focuses on the actual changes and their impact.

 --- success
 true/false
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
openhands	96fd7c3cc4	fix: ensure panels occupy full window size when window is large - Add proper size constraints based on window dimensions - Add window resize handling to maintain constraints - Improve panel styles with proper flex behavior - Set appropriate min/max dimensions for both panels	2025-01-07 03:39:57 +00:00
Boxuan Li	fb53ae43c0	Add a stress test for eventstream runtime (#6038 ) Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>	2025-01-06 22:36:59 +00:00
Graham Neubig	1f8a0180d3	Add runtime size configuration feature (#5805 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>	2025-01-07 06:22:58 +08:00
Robert Brennan	8cfcdd7ba3	Add close method to EventStream (#6093 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: tofarr <tofarr@gmail.com>	2025-01-06 21:59:42 +00:00
tofarr	9515ac5e62	Feat - browser client can now close sessions. (#6088 )	2025-01-06 14:26:48 -07:00
Xingyao Wang	cebd391b7a	fix: better handle bashlex error (#6090 )	2025-01-06 20:45:59 +00:00
Robert Brennan	343b86429e	Retrieve GitHub IDs more efficiently (#6074 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-01-06 19:22:52 +00:00
sp.wack	09734467c0	fix(frontend): Only render loading indicator if events are messages (#6082 )	2025-01-06 13:03:44 -05:00
Dmitry Kozlov	17d722f3b3	Update README.md (#6076 ) Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>	2025-01-06 17:31:19 +00:00
tofarr	e310f6b776	Feature - sort conversations by created at (#6079 )	2025-01-06 09:07:53 -07:00
dependabot[bot]	5626a22e42	chore(deps-dev): bump @tanstack/eslint-plugin-query from 5.62.9 to 5.62.15 in /frontend in the eslint group (#6077 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-01-06 14:49:43 +00:00
tofarr	cde8aad47f	Feat multi conversations wiring (#6011 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-01-06 07:43:11 -07:00
stefand678	efd0267919	docs: Fix inconsistent comments (#6051 )	2025-01-06 13:43:53 +09:00
Xingyao Wang	f8735efadf	chore: improve error logging for RuntimeError (#6055 )	2025-01-05 23:02:42 +00:00
Boxuan Li	00d7395e09	Makefile: Fix poetry version detector (#6058 )	2025-01-05 22:43:05 +00:00
மனோஜ்குமார் பழனிச்சாமி	150463e629	feat: Add GPU support (#6042 )	2025-01-05 15:28:05 +09:00
Xingyao Wang	b7bbf0f5eb	fix(agent controller): missing await (#6040 )	2025-01-05 04:57:07 +00:00
f-diao	d2790c8b21	docs: Update the referenced py filename. (#6043 )	2025-01-05 04:10:51 +00:00
Engel Nyst	3d2138d9ce	Command line args fixes (#5990 )	2025-01-05 02:58:26 +00:00
OpenHands	e4cf2eee2d	Fix issue #4864 : [Bug]: make start-backend results in NotImplementedError: Non-relative patterns are unsupported (#5332 ) Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2025-01-05 11:44:23 +09:00
dependabot[bot]	79551e67f6	chore(deps): bump docker/setup-qemu-action from 3.0.0 to 3.2.0 (#5798 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-01-05 03:43:26 +01:00
Xingyao Wang	f5f988e552	fix(agent controller): state.metrics is missing on exception (#6036 )	2025-01-05 01:08:47 +00:00
Talut Salako	0c58f469b4	fix: improve how llm models option (#6026 )	2025-01-05 00:25:45 +00:00
Xingyao Wang	56d7dccec9	fix(runtime): replace send_request with _send_action_server_request (#6035 )	2025-01-04 23:38:34 +00:00
Graham Neubig	411b63159f	fix: Use _send_action_server_request in send_action_for_execution (#5951 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-01-05 08:13:18 +09:00
OpenHands	5ca0beadfb	Fix issue #5995 : [Resolver] Resolver's summary suggests UNRESOLVED due to "no human reviewer" (#5996 ) Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> Co-authored-by: Graham Neubig <neubig@gmail.com>	2025-01-05 05:49:38 +09:00
Xingyao Wang	aaff3dd075	fix(llm): cost metrics calculation for unsupport litellm prefix (#6022 )	2025-01-04 18:09:13 +00:00
sai krishna rohith k	ef2053011d	feat: Added RateLimitError status on UI and Agent state (#5910 )	2025-01-04 12:07:07 -05:00
siu	e6499a68f6	fix(frontend): Prevent message submission during IME composition (#6025 )	2025-01-04 10:41:48 +00:00
Ryan H. Tran	33cb1d5f3c	chore: upgrade openhands-aci to 0.1.6 (#6023 )	2025-01-04 07:53:18 +00:00
Graham Neubig	5bdebac741	Add git patch info to guess_success prompt (#5950 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-01-04 10:56:50 +09:00
Robert Brennan	510c1644dd	Add bytes support to FileStore write operations (#6019 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-01-03 16:30:25 -07:00
Xingyao Wang	ec70af9412	refactor: Replace pexpect with libtmux in BashSession (#4881 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Robert Brennan <accounts@rbren.io>	2025-01-04 05:22:13 +08:00
Robert Brennan	761a574b09	Small style changes to repo picker (#6013 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Graham Neubig <neubig@gmail.com> Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>	2025-01-03 20:44:30 +00:00
				`@@ -1 +0,0 @@`
				`export const MULTI_CONVO_UI_IS_ENABLED = false;`