fix: Error: EACCES: permission denied for corepack

fix: add the community-maintained model list (#654 )
Remove md around json (#685 )
2026-04-29 03:00:45 -04:00 · 2024-04-05 13:56:23 +08:00 · 2024-04-04 23:21:17 -04:00 · 2024-04-04 23:04:34 -04:00 · 2024-04-05 02:24:27 +00:00 · 2024-04-05 02:08:23 +00:00
530 changed files with 35345 additions and 25660 deletions
@@ -0,0 +1 @@
+*.ipynb linguist-vendored
@@ -0,0 +1,40 @@
+---
+name: Bug Report
+about: Report a problem with OpenDevin
+title: ''
+labels: 'bug'
+assignees: ''
+
+---
+<!-- You MUST fill out this template. We will close issues that don't include enough information to reproduce -->
+#### Describe the bug
+<!-- a short description of the problem -->
+
+#### Setup and configuration
+**Current version**:
+<!-- run `git log -n 1` to see this -->
+```bash
+```
+
+<!-- tell us everything about your environment -->
+**My config.toml and environment vars** (be sure to redact API keys):
+```toml
+```
+
+**My model and agent** (you can see these settings in the UI):
+* Model:
+* Agent:
+
+**Commands I ran to install and run OpenDevin**:
+```
+```
+
+**Steps to Reproduce**:
+1.
+2.
+3.
+
+**Logs, error messages, and screenshots**:
+
+#### Additional Context
+
@@ -0,0 +1,18 @@
+---
+name: Feature Request
+about: Suggest an idea for OpenDevin features
+title: ''
+labels: 'enhancement'
+assignees: ''
+
+---
+
+**What problem or use case are you trying to solve?**
+
+**Describe the UX of the solution you'd like**
+
+**Do you have thoughts on the technical implementation?**
+
+**Describe alternatives you've considered**
+
+**Additional context**
@@ -0,0 +1,18 @@
+---
+name: Technical Proposal
+about: Propose a new architecture or technology
+title: ''
+labels: 'proposal'
+assignees: ''
+
+---
+
+**Summary**
+
+**Motivation**
+
+**Technical Design**
+
+**Alternatives to Consider**
+
+**Additional context**
@@ -0,0 +1,17 @@
+name: Build & Run Tests
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.11'
+      - name: Run tests
+        run: |
+          make build
+          poetry run pytest ./tests
@@ -0,0 +1,59 @@
+name: Build and publish multi-arch container images
+
+on:
+  push:
+    branches: [ main ]
+  workflow_dispatch:
+    inputs:
+      reason:
+        description: 'Reason for manual trigger'
+        required: true
+        default: ''
+
+jobs:
+  ghcr_build_and_push:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push' || github.event.inputs.reason != ''
+
+    steps:
+      - name: checkout
+        uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log-in to ghcr.io
+        run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
+
+      - name: Build and push multi-arch container images
+        run: |
+          # set env for fork repo
+          DOCKER_BUILD_ORG=$(echo "${{ github.repository }}" | tr '[A-Z]' '[a-z]' | cut -d '/' -f 1)
+          # Find directories containing Dockerfile but not containing .dockerfileignore
+          while IFS= read -r dockerfile_dir; do
+          
+            # Check if .dockerfileignore exists in the directory
+            if [ -f "$dockerfile_dir/.dockerfileignore" ]; then
+                echo "$dockerfile_dir/.dockerfileignore exists, skipping build and push"
+                continue
+            fi
+          
+            # Check if image was already exist in ghcr.io
+            pushd "$dockerfile_dir" > /dev/null
+            FULL_IMAGE=$(make get-full-image DOCKER_BUILD_ORG=$DOCKER_BUILD_ORG)
+            popd > /dev/null
+            EXISTS=$(docker manifest inspect "$FULL_IMAGE" > /dev/null 2>&1 && echo "true" || echo "false")
+            if [ "$EXISTS" == "true" ]; then
+              echo "Image $FULL_IMAGE already exists in ghcr.io, skipping build and push"
+              continue
+            fi
+          
+            # Build and push the image to ghcr.io
+            pushd "$dockerfile_dir" > /dev/null
+            make all DOCKER_BUILD_ORG=$DOCKER_BUILD_ORG
+            popd > /dev/null
+          done < <(find . -type f -name Dockerfile -exec dirname {} \; | sort -u)
@@ -0,0 +1,47 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint-frontend:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Install PNPM
+        uses: pnpm/action-setup@v2
+        with:
+          package_json_file: frontend/package.json
+
+      - name: Install Node.js 20
+        uses: actions/setup-node@v2
+        with:
+          node-version: 20
+          cache: 'pnpm'
+          cache-dependency-path: 'frontend/pnpm-lock.yaml'
+
+      - name: Install dependencies
+        run: |
+          cd frontend
+          pnpm install --frozen-lockfile
+
+      - name: Lint
+        run: |
+          cd frontend
+          pnpm run lint
+
+  lint-python:
+    name: Lint python
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.11
+      - name: Install dependencies
+        run: pip install ruff mypy
+      - name: Run ruff
+        run: ruff check --config dev_config/python/ruff.toml opendevin/ agenthub/
+      - name: Run mypy
+        run: mypy --install-types --non-interactive --config-file dev_config/python/mypy.ini opendevin/ agenthub/
@@ -0,0 +1,203 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+./lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+requirements.txt
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+*venv/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+.vscode/
+
+# evaluation
+evaluation/SWE-bench/data
+
+# frontend
+
+# dependencies
+frontend/node_modules
+frontend/.pnp
+frontend/bun.lockb
+frontend/yarn.lock
+.pnp.js
+
+# testing
+frontend/coverage
+
+# production
+frontend/build
+frontend/dist
+
+# misc
+.DS_Store
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+logs
+
+# agent
+.envrc
+/workspace
+/debug
+
+# configuration
+config.toml
@@ -0,0 +1,89 @@
+# Contributing
+
+Thanks for your interest in contributing to OpenDevin! We welcome and appreciate contributions.
+To report bugs, create a [GitHub issue](https://github.com/OpenDevin/OpenDevin/issues/new/choose).
+
+## Contribution Guide
+### 1. Fork the Official Repository
+
+Fork [OpenDevin repository](https://github.com/OpenDevin/OpenDevin) into your own account.
+Clone your own forked repository into your local environment.
+
+```shell
+git clone git@github.com:<YOUR-USERNAME>/OpenDevin.git
+```
+
+### 2. Configure Git
+
+Set the official repository as your [upstream](https://www.atlassian.com/git/tutorials/git-forks-and-upstreams) to synchronize with the latest update in the official repository.
+Add the original repository as upstream
+
+```shell
+cd OpenDevin
+git remote add upstream git@github.com:OpenDevin/OpenDevin.git
+```
+
+Verify that the remote is set.
+```shell
+git remote -v
+```
+You should see both `origin` and `upstream` in the output.
+
+### 3. Synchronize with Official Repository
+Synchronize latest commit with official repository before coding.
+
+```shell
+git fetch upstream
+git checkout main
+git merge upstream/main
+git push origin main
+```
+
+### 4. Create a New Branch And Open a Pull Request
+After you finish implementation, open forked repository. The source branch is your new branch, and the target branch is `OpenDevin/OpenDevin` `main` branch. Then PR should appears in [OpenDevin PRs](https://github.com/OpenDevin/OpenDevin/pulls).
+
+Then OpenDevin team will review your code.
+
+## PR Rules
+
+### 1. Pull Request title
+
+As described in [here](https://github.com/commitizen/conventional-commit-types/blob/master/index.json), a valid PR title should begin with one of the following prefixes:
+
+- `feat`: A new feature
+- `fix`: A bug fix
+- `doc`: Documentation only changes
+- `refactor`: A code change that neither fixes a bug nor adds a feature
+- `style`: A refactoring that improves code style
+- `perf`: A code change that improves performance
+- `test`: Adding missing tests or correcting existing tests
+- `ci`: Changes to CI configuration files and scripts (example scopes: `.github`, `ci` (Buildkite))
+- `chore`: Other changes that don't modify src or test files
+- `revert`: Reverts a previous commit
+
+For example, a PR title could be:
+- `refactor: modify package path`
+- `feat(frontend): xxxx`, where `(frontend)` means that this PR mainly focuses on the frontend component.
+
+You may also check out previous PRs in the [PR list](https://github.com/OpenDevin/OpenDevin/pulls).
+
+As described in [here](https://github.com/OpenDevin/OpenDevin/labels), we create several labels. Every PR should be tagged with the corresponding labels.
+
+### 2. Pull Request description
+
+- If your PR is small (such as a typo fix), you can go brief.
+- If it is large and you have changed a lot, it's better to write more details.
+
+
+## How to begin
+Please refer to the README in each module:
+- [frontend](./frontend/README.md)
+- [agenthub](./agenthub/README.md)
+- [evaluation](./evaluation/README.md)
+- [opendevin](./opendevin/README.md)
+    - [server](./opendevin/server/README.md)
+    - [mock server](./opendevin/mock/README.md)
+
+## Tests
+TODO: make sure code pass the test before submit.
+
@@ -0,0 +1,25 @@
+The MIT License (MIT)
+=====================
+
+Copyright © 2023
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the “Software”), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,102 @@
+# Makefile for OpenDevin project
+
+# Variables
+DOCKER_IMAGE = ghcr.io/opendevin/sandbox
+BACKEND_PORT = 3000
+BACKEND_HOST = "127.0.0.1:$(BACKEND_PORT)"
+FRONTEND_PORT = 3001
+DEFAULT_WORKSPACE_DIR = "./workspace"
+DEFAULT_MODEL = "gpt-4-0125-preview"
+CONFIG_FILE = config.toml
+PRECOMMIT_CONFIG_PATH = "./dev_config/python/.pre-commit-config.yaml"
+
+# Build
+build:
+	@echo "Building project..."
+	@echo "Pulling Docker image..."
+	@docker pull $(DOCKER_IMAGE)
+	@echo "Installing Python dependencies..."
+	@curl -sSL https://install.python-poetry.org | python3 -
+	@poetry install --without evaluation
+	@echo "Activating Poetry shell..."
+	@echo "Installing pre-commit hooks..."
+	@poetry run pre-commit install --config $(PRECOMMIT_CONFIG_PATH)
+	@echo "Setting up frontend environment..."
+	@echo "Detect Node.js version..."
+	@cd frontend && node ./scripts/detect-node-version.js
+	@cd frontend && if [ -f node_modules/.package-lock.json ]; then \
+		echo "This project currently uses \"pnpm\" for dependency management. It has detected that dependencies were previously installed using \"npm\" and has automatically deleted the \"node_modules\" directory to prevent unnecessary conflicts."; \
+		rm -rf node_modules; \
+	fi
+	@which corepack > /dev/null || (echo "Installing corepack..." && npm install -g corepack)
+	@cd frontend && sudo corepack enable && pnpm install && pnpm run make-i18n
+
+# Start backend
+start-backend:
+	@echo "Starting backend..."
+	@poetry run uvicorn opendevin.server.listen:app --port $(BACKEND_PORT)
+
+# Start frontend
+start-frontend:
+	@echo "Starting frontend..."
+	@cd frontend && BACKEND_HOST=$(BACKEND_HOST) FRONTEND_PORT=$(FRONTEND_PORT) pnpm run start
+
+# Run the app
+run:
+	@echo "Running the app..."
+	@if [ "$(OS)" = "Windows_NT" ]; then \
+		echo "`make run` is not supported on Windows. Please run `make start-frontend` and `make start-backend` separately."; \
+		exit 1; \
+	fi
+	@mkdir -p logs
+	@poetry run nohup uvicorn opendevin.server.listen:app --port $(BACKEND_PORT) > logs/backend_$(shell date +'%Y%m%d_%H%M%S').log 2>&1 &
+	@echo "Waiting for the backend to start..."
+	@until nc -z localhost $(BACKEND_PORT); do sleep 0.1; done
+	@cd frontend && pnpm run start -- --port $(FRONTEND_PORT)
+
+# Setup config.toml
+setup-config:
+	@echo "Setting up config.toml..."
+	@read -p "Enter your LLM Model name (see https://docs.litellm.ai/docs/providers for full list) [default: $(DEFAULT_MODEL)]: " llm_model; \
+	 llm_model=$${llm_model:-$(DEFAULT_MODEL)}; \
+	 echo "LLM_MODEL=\"$$llm_model\"" > $(CONFIG_FILE).tmp
+
+	@read -p "Enter your LLM API key: " llm_api_key; \
+	 echo "LLM_API_KEY=\"$$llm_api_key\"" >> $(CONFIG_FILE).tmp
+
+	@echo "Enter your LLM Embedding Model\nChoices are openai, azureopenai, llama2 or leave blank to default to 'BAAI/bge-small-en-v1.5' via huggingface"; \
+	 read -p "> " llm_embedding_model; \
+	 	echo "LLM_EMBEDDING_MODEL=\"$$llm_embedding_model\"" >> $(CONFIG_FILE).tmp; \
+		if [ "$$llm_embedding_model" = "llama2" ]; then \
+			read -p "Enter the local model URL: " llm_base_url; \
+				echo "LLM_BASE_URL=\"$$llm_base_url\"" >> $(CONFIG_FILE).tmp; \
+		elif [ "$$llm_embedding_model" = "azureopenai" ]; then \
+			read -p "Enter the Azure endpoint URL: " llm_base_url; \
+				echo "LLM_BASE_URL=\"$$llm_base_url\"" >> $(CONFIG_FILE).tmp; \
+			read -p "Enter the Azure LLM Deployment Name: " llm_deployment_name; \
+				echo "LLM_DEPLOYMENT_NAME=\"$$llm_deployment_name\"" >> $(CONFIG_FILE).tmp; \
+			read -p "Enter the Azure API Version: " llm_api_version; \
+				echo "LLM_API_VERSION=\"$$llm_api_version\"" >> $(CONFIG_FILE).tmp; \
+		fi
+
+	@read -p "Enter your workspace directory [default: $(DEFAULT_WORKSPACE_DIR)]: " workspace_dir; \
+	 workspace_dir=$${workspace_dir:-$(DEFAULT_WORKSPACE_DIR)}; \
+	 echo "WORKSPACE_DIR=\"$$workspace_dir\"" >> $(CONFIG_FILE).tmp
+
+	@mv $(CONFIG_FILE).tmp $(CONFIG_FILE)
+
+# Help
+help:
+	@echo "Usage: make [target]"
+	@echo "Targets:"
+	@echo "  build               - Build project, including environment setup and dependencies."
+	@echo "  build-eval          - Build project evaluation pipeline, including environment setup and dependencies."
+	@echo "  start-backend       - Start the backend server for the OpenDevin project."
+	@echo "  start-frontend      - Start the frontend server for the OpenDevin project."
+	@echo "  run                 - Run the OpenDevin application, starting both backend and frontend servers."
+	@echo "                        Backend Log file will be stored in the 'logs' directory."
+	@echo "  setup-config        - Setup the configuration for OpenDevin by providing LLM API key, LLM Model name, and workspace directory."
+	@echo "  help                - Display this help message, providing information on available targets."
+
+# Phony targets
+.PHONY: build build-eval start-backend start-frontend run setup-config help
@@ -0,0 +1,253 @@
+<a name="readme-top"></a>
+<!--
+*** Thanks for checking out the Best-README-Template. If you have a suggestion
+*** that would make this better, please fork the repo and create a pull request
+*** or simply open an issue with the tag "enhancement".
+*** Don't forget to give the project a star!
+*** Thanks again! Now go create something AMAZING! :D
+-->
+
+
+
+<!-- PROJECT SHIELDS -->
+<!--
+*** I'm using markdown "reference style" links for readability.
+*** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
+*** See the bottom of this document for the declaration of the reference variables
+*** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
+*** https://www.markdownguide.org/basic-syntax/#reference-style-links
+-->
+
+<div align="center">
+  <a href="https://github.com/OpenDevin/OpenDevin/graphs/contributors"><img src="https://img.shields.io/github/contributors/opendevin/opendevin?style=for-the-badge" alt="Contributors"></a>
+  <a href="https://github.com/OpenDevin/OpenDevin/network/members"><img src="https://img.shields.io/github/forks/opendevin/opendevin?style=for-the-badge" alt="Forks"></a>
+  <a href="https://github.com/OpenDevin/OpenDevin/stargazers"><img src="https://img.shields.io/github/stars/opendevin/opendevin?style=for-the-badge" alt="Stargazers"></a>
+  <a href="https://github.com/OpenDevin/OpenDevin/issues"><img src="https://img.shields.io/github/issues/opendevin/opendevin?style=for-the-badge" alt="Issues"></a>
+  <a href="https://github.com/OpenDevin/OpenDevin/blob/main/LICENSE"><img src="https://img.shields.io/github/license/opendevin/opendevin?style=for-the-badge" alt="MIT License"></a>
+</div>
+
+<!-- PROJECT LOGO -->
+<div align="center">
+  <img src="./logo.png" alt="Logo" width="200" height="200">
+  <h1 align="center">OpenDevin: Code Less, Make More</h1>
+</div>
+
+
+
+
+<!-- TABLE OF CONTENTS -->
+<details>
+  <summary>🗂️ Table of Contents</summary>
+  <ol>
+    <li><a href="#-mission">🎯 Mission</a></li>
+    <li><a href="#-what-is-devin">🤔 What is Devin?</a></li>
+    <li><a href="#-why-opendevin">🐚 Why OpenDevin?</a></li>
+    <li><a href="#-project-status">🚧 Project Status</a></li>
+      <a href="#-get-started">🚀 Get Started</a>
+      <ul>
+        <li><a href="#1-requirements">1. Requirements</a></li>
+        <li><a href="#2-build-and-setup">2. Build and Setup</a></li>
+        <li><a href="#3-run-the-application">3. Run the Application</a></li>
+        <li><a href="#4-individual-server-startup">4. Individual Server Startup</a></li>
+        <li><a href="#5-help">5. Help</a></li>
+      </ul>
+    </li>
+    <li><a href="#%EF%B8%8F-research-strategy">⭐️ Research Strategy</a></li>
+    <li><a href="#-how-to-contribute">🤝 How to Contribute</a></li>
+    <li><a href="#-join-our-community">🤖 Join Our Community</a></li>
+    <li><a href="#%EF%B8%8F-built-with">🛠️ Built With</a></li>
+    <li><a href="#-license">📜 License</a></li>
+  </ol>
+</details>
+
+## 🎯 Mission
+
+[Project Demo Video](https://github.com/OpenDevin/OpenDevin/assets/38853559/71a472cc-df34-430c-8b1d-4d7286c807c9)
+
+
+Welcome to OpenDevin, an open-source project aiming to replicate Devin, an autonomous AI software engineer who is capable of executing complex engineering tasks and collaborating actively with users on software development projects. This project aspires to replicate, enhance, and innovate upon Devin through the power of the open-source community.
+
+<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
+    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
+        ↑ Back to Top ↑
+    </a>
+</p>
+
+## 🤔 What is Devin?
+Devin represents a cutting-edge autonomous agent designed to navigate the complexities of software engineering. It leverages a combination of tools such as a shell, code editor, and web browser, showcasing the untapped potential of LLMs in software development. Our goal is to explore and expand upon Devin's capabilities, identifying both its strengths and areas for improvement, to guide the progress of open code models.
+
+<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
+    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
+        ↑ Back to Top ↑
+    </a>
+</p>
+
+## 🐚 Why OpenDevin?
+The OpenDevin project is born out of a desire to replicate, enhance, and innovate beyond the original Devin model. By engaging the open-source community, we aim to tackle the challenges faced by Code LLMs in practical scenarios, producing works that significantly contribute to the community and pave the way for future advancements.
+
+<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
+    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
+        ↑ Back to Top ↑
+    </a>
+</p>
+
+## 🚧 Project Status
+
+OpenDevin is currently a work in progress, but you can already run the alpha version to see the end-to-end system in action. The project team is actively working on the following key milestones:
+
+- **UI**: Developing a user-friendly interface, including a chat interface, a shell demonstrating commands, and a web browser.
+- **Architecture**: Building a stable agent framework with a robust backend that can read, write, and run simple commands.
+- **Agent Capabilities**: Enhancing the agent's abilities to generate bash scripts, run tests, and perform other software engineering tasks.
+- **Evaluation**: Establishing a minimal evaluation pipeline that is consistent with Devin's evaluation criteria.
+
+After completing the MVP, the team will focus on research in various areas, including foundation models, specialist capabilities, evaluation, and agent studies.
+
+<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
+    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
+        ↑ Back to Top ↑
+    </a>
+</p>
+
+## 🚀 Get Started
+
+Getting started with the OpenDevin project is incredibly easy. Follow these simple steps to set up and run OpenDevin on your system:
+
+### 1. Requirements
+* Linux, Mac OS, or [WSL on Windows](https://learn.microsoft.com/en-us/windows/wsl/install)
+* [Docker](https://docs.docker.com/engine/install/)(For those on MacOS, make sure to allow the default Docker socket to be used from advanced settings!)
+* [Python](https://www.python.org/downloads/) >= 3.11
+* [NodeJS](https://nodejs.org/en/download/package-manager) >= 18.17.1
+
+### 2. Build and Setup The Environment
+
+- **Build the Project:** Begin by building the project, which includes setting up the environment and installing dependencies. This step ensures that OpenDevin is ready to run smoothly on your system.
+    ```bash
+    make build
+    ```
+
+### 3. Configuring the Language Model
+
+OpenDevin supports a diverse array of Language Models (LMs) through the powerful [litellm](https://docs.litellm.ai) library. By default, we've chosen the mighty GPT-4 from OpenAI as our go-to model, but the world is your oyster! You can unleash the potential of Anthropic's suave Claude, the enigmatic Llama, or any other LM that piques your interest.
+
+To configure the LM of your choice, follow these steps:
+
+1. **Using the Makefile: The Effortless Approach**
+   With a single command, you can have a smooth LM setup for your OpenDevin experience. Simply run:
+   ```bash
+   make setup-config
+   ```
+   This command will prompt you to enter the LLM API key and model name, ensuring that OpenDevin is tailored to your specific needs.
+
+2. **Manual Config: The Artisanal Touch**
+   If you're feeling particularly adventurous, you can manually update the `config.toml` file located in the project's root directory. Here, you'll find the `llm_api_key` and `llm_model_name` fields, where you can set the LM of your choosing.
+
+**Note on Alternative Models:**
+Some alternative models may prove more challenging to tame than others. Fear not, brave adventurer! We shall soon unveil LLM-specific documentation to guide you on your quest. And if you've already mastered the art of wielding a model other than OpenAI's GPT, we encourage you to [share your setup instructions with us](https://github.com/OpenDevin/OpenDevin/issues/417).
+
+For a full list of the LM providers and models available, please consult the [litellm documentation](https://docs.litellm.ai/docs/providers).
+
+### 4. Run the Application
+
+- **Run the Application:** Once the setup is complete, launching OpenDevin is as simple as running a single command. This command starts both the backend and frontend servers seamlessly, allowing you to interact with OpenDevin without any hassle.
+    ```bash
+    make run
+    ```
+
+### 5. Individual Server Startup
+
+- **Start the Backend Server:** If you prefer, you can start the backend server independently to focus on backend-related tasks or configurations.
+    ```bash
+    make start-backend
+    ```
+
+- **Start the Frontend Server:** Similarly, you can start the frontend server on its own to work on frontend-related components or interface enhancements.
+    ```bash
+    make start-frontend
+    ```
+
+### 6. Help
+
+- **Get Some Help:** Need assistance or information on available targets and commands? The help command provides all the necessary guidance to ensure a smooth experience with OpenDevin.
+    ```bash
+    make help
+    ```
+
+<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
+    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
+        ↑ Back to Top ↑
+    </a>
+</p>
+
+## ⭐️ Research Strategy
+
+Achieving full replication of production-grade applications with LLMs is a complex endeavor. Our strategy involves:
+
+1. **Core Technical Research:** Focusing on foundational research to understand and improve the technical aspects of code generation and handling.
+2. **Specialist Abilities:** Enhancing the effectiveness of core components through data curation, training methods, and more.
+3. **Task Planning:** Developing capabilities for bug detection, codebase management, and optimization.
+4. **Evaluation:** Establishing comprehensive evaluation metrics to better understand and improve our models.
+
+<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
+    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
+        ↑ Back to Top ↑
+    </a>
+</p>
+
+## 🤝 How to Contribute
+
+OpenDevin is a community-driven project, and we welcome contributions from everyone. Whether you're a developer, a researcher, or simply enthusiastic about advancing the field of software engineering with AI, there are many ways to get involved:
+
+- **Code Contributions:** Help us develop the core functionalities, frontend interface, or sandboxing solutions.
+- **Research and Evaluation:** Contribute to our understanding of LLMs in software engineering, participate in evaluating the models, or suggest improvements.
+- **Feedback and Testing:** Use the OpenDevin toolset, report bugs, suggest features, or provide feedback on usability.
+
+For details, please check [this document](./CONTRIBUTING.md).
+
+<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
+    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
+        ↑ Back to Top ↑
+    </a>
+</p>
+
+## 🤖 Join Our Community
+
+Join our Slack workspace by filling out the [form](https://forms.gle/758d5p6Ve8r2nxxq6). Stay updated on OpenDevin's progress, share ideas, and collaborate with fellow enthusiasts and experts. Let's simplify software engineering together!
+
+🐚 **Code less, make more with OpenDevin.**
+
+[![Star History Chart](https://api.star-history.com/svg?repos=OpenDevin/OpenDevin&type=Date)](https://star-history.com/#OpenDevin/OpenDevin&Date)
+
+## 🛠️ Built With
+
+OpenDevin is built using a combination of powerful frameworks and libraries, providing a robust foundation for its development. Here are the key technologies used in the project:
+
+![FastAPI](https://img.shields.io/badge/FastAPI-black?style=for-the-badge) ![uvicorn](https://img.shields.io/badge/uvicorn-black?style=for-the-badge) ![LiteLLM](https://img.shields.io/badge/LiteLLM-black?style=for-the-badge) ![Docker](https://img.shields.io/badge/Docker-black?style=for-the-badge) ![Ruff](https://img.shields.io/badge/Ruff-black?style=for-the-badge) ![MyPy](https://img.shields.io/badge/MyPy-black?style=for-the-badge) ![LlamaIndex](https://img.shields.io/badge/LlamaIndex-black?style=for-the-badge) ![React](https://img.shields.io/badge/React-black?style=for-the-badge)
+
+Please note that the selection of these technologies is in progress, and additional technologies may be added or existing ones may be removed as the project evolves. We strive to adopt the most suitable and efficient tools to enhance the capabilities of OpenDevin.
+
+<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
+    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
+        ↑ Back to Top ↑
+    </a>
+</p>
+
+## 📜 License
+
+Distributed under the MIT License. See [`LICENSE`](./LICENSE) for more information.
+
+<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
+    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
+        ↑ Back to Top ↑
+    </a>
+</p>
+
+[contributors-shield]: https://img.shields.io/github/contributors/opendevin/opendevin?style=for-the-badge
+[contributors-url]: https://github.com/OpenDevin/OpenDevin/graphs/contributors
+[forks-shield]: https://img.shields.io/github/forks/opendevin/opendevin?style=for-the-badge
+[forks-url]: https://github.com/OpenDevin/OpenDevin/network/members
+[stars-shield]: https://img.shields.io/github/stars/opendevin/opendevin?style=for-the-badge
+[stars-url]: https://github.com/OpenDevin/OpenDevin/stargazers
+[issues-shield]: https://img.shields.io/github/issues/opendevin/opendevin?style=for-the-badge
+[issues-url]: https://github.com/OpenDevin/OpenDevin/issues
+[license-shield]: https://img.shields.io/github/license/opendevin/opendevin?style=for-the-badge
+[license-url]: https://github.com/OpenDevin/OpenDevin/blob/main/LICENSE
@@ -1,37 +0,0 @@
-import os
-
-__package_name__ = 'openhands_ai'
-
-
-def get_version():
-    # Try getting the version from pyproject.toml
-    try:
-        root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-        with open(os.path.join(root_dir, 'pyproject.toml'), 'r') as f:
-            for line in f:
-                if line.startswith('version ='):
-                    return line.split('=')[1].strip().strip('"')
-    except FileNotFoundError:
-        pass
-
-    try:
-        from importlib.metadata import PackageNotFoundError, version
-
-        return version(__package_name__)
-    except (ImportError, PackageNotFoundError):
-        pass
-
-    try:
-        from pkg_resources import DistributionNotFound, get_distribution
-
-        return get_distribution(__package_name__).version
-    except (ImportError, DistributionNotFound):
-        pass
-
-    return 'unknown'
-
-
-try:
-    __version__ = get_version()
-except Exception:
-    __version__ = 'unknown'
@@ -0,0 +1,73 @@
+# Agent Framework Research
+
+In this folder, there may exist multiple implementations of `Agent` that will be used by the framework.
+
+For example, `agenthub/monologue_agent`, `agenthub/metagpt_agent`, `agenthub/codeact_agent`, etc.
+Contributors from different backgrounds and interests can choose to contribute to any (or all!) of these directions.
+
+## Constructing an Agent
+
+The abstraction for an agent can be found [here](../opendevin/agent.py).
+
+Agents are run inside of a loop. At each iteration, `agent.step()` is called with a
+[State](../opendevin/state.py) input, and the agent must output an [Action](../opendevin/action).
+
+Every agent also has a `self.llm` which it can use to interact with the LLM configured by the user.
+See the [LiteLLM docs for `self.llm.completion`](https://docs.litellm.ai/docs/completion).
+
+## State
+The `state` contains:
+* A history of actions taken by the agent, as well as any observations (e.g. file content, command output) from those actions
+* A list of actions/observations that have happened since the most recent step
+* A [`plan`](https://github.com/OpenDevin/OpenDevin/blob/main/opendevin/plan.py), which contains the main goal
+  * The agent can add and modify subtasks through the `AddTaskAction` and `ModifyTaskAction`
+
+## Actions
+Here is a list of available Actions, which can be returned by `agent.step()`:
+- [`CmdRunAction`](../opendevin/action/bash.py) - Runs a command inside a sandboxed terminal
+- [`CmdKillAction`](../opendevin/action/bash.py) - Kills a background command
+- [`FileReadAction`](../opendevin/action/fileop.py) - Reads the content of a file
+- [`FileWriteAction`](../opendevin/action/fileop.py) - Writes new content to a file
+- [`BrowseURLAction`](../opendevin/action/browse.py) - Gets the content of a URL
+- [`AgentRecallAction`](../opendevin/action/agent.py) - Searches memory (e.g. a vector database)
+- [`AddTaskAction`](../opendevin/action/tasks.py) - Adds a subtask to the plan
+- [`ModifyTaskAction`](../opendevin/action/tasks.py) - Changes the state of a subtask
+- [`AgentThinkAction`](../opendevin/action/agent.py) - A no-op that allows the agent to add plaintext to the history (as well as the chat log)
+- [`AgentFinishAction`](../opendevin/action/agent.py) - Stops the control loop, allowing the user to enter a new task
+
+You can use `action.to_dict()` and `action_from_dict` to serialize and deserialize actions.
+
+## Observations
+There are also several types of Observations. These are typically available in the step following the corresponding Action.
+But they may also appear as a result of asynchronous events (e.g. a message from the user, logs from a command running
+in the background).
+
+Here is a list of available Observations:
+- [`CmdOutputObservation`](../opendevin/observation/run.py)
+- [`BrowserOutputObservation`](../opendevin/observation/browse.py)
+- [`FileReadObservation`](../opendevin/observation/files.py)
+- [`FileWriteObservation`](../opendevin/observation/files.py)
+- [`UserMessageObservation`](../opendevin/observation/)
+- [`AgentRecallObservation`](../opendevin/observation/recall.py)
+- [`AgentErrorObservation`](../opendevin/observation/error.py)
+
+You can use `observation.to_dict()` and `observation_from_dict` to serialize and deserialize observations.
+
+## Interface
+Every agent must implement the following methods:
+
+### `step`
+```
+def step(self, state: "State") -> "Action"
+```
+`step` moves the agent forward one step towards its goal. This probably means
+sending a prompt to the LLM, then parsing the response into an `Action`.
+
+### `search_memory`
+```
+def search_memory(self, query: str) -> List[str]:
+```
+`search_memory` should return a list of events that match the query. This will be used
+for the `recall` action.
+
+You can optionally just return `[]` for this method, meaning the agent has no long-term memory.
@@ -1,39 +1,9 @@
 from dotenv import load_dotenv
-
-from openhands.agenthub.micro.agent import MicroAgent
-from openhands.agenthub.micro.registry import all_microagents
-from openhands.controller.agent import Agent
-
 load_dotenv()

+# Import agents after environment variables are loaded
+from . import monologue_agent # noqa: E402
+from . import codeact_agent # noqa: E402
+from . import planner_agent # noqa: E402

-from openhands.agenthub import (  # noqa: E402
-    browsing_agent,
-    codeact_agent,
-    delegator_agent,
-    dummy_agent,
-    planner_agent,
-)
-
-__all__ = [
-    'codeact_agent',
-    'planner_agent',
-    'delegator_agent',
-    'dummy_agent',
-    'browsing_agent',
-]
-
-for agent in all_microagents.values():
-    name = agent['name']
-    prompt = agent['prompt']
-
-    anon_class = type(
-        name,
-        (MicroAgent,),
-        {
-            'prompt': prompt,
-            'agent_definition': agent,
-        },
-    )
-
-    Agent.register(name, anon_class)
+__all__ = ['monologue_agent', 'codeact_agent', 'planner_agent']
@@ -1,4 +0,0 @@
-from openhands.agenthub.browsing_agent.browsing_agent import BrowsingAgent
-from openhands.controller.agent import Agent
-
-Agent.register('BrowsingAgent', BrowsingAgent)
@@ -1,223 +0,0 @@
-import os
-
-from browsergym.core.action.highlevel import HighLevelActionSet
-from browsergym.utils.obs import flatten_axtree_to_str
-
-from openhands.agenthub.browsing_agent.response_parser import BrowsingResponseParser
-from openhands.controller.agent import Agent
-from openhands.controller.state.state import State
-from openhands.core.config import AgentConfig
-from openhands.core.logger import openhands_logger as logger
-from openhands.core.message import Message, TextContent
-from openhands.events.action import (
-    Action,
-    AgentFinishAction,
-    BrowseInteractiveAction,
-    MessageAction,
-)
-from openhands.events.event import EventSource
-from openhands.events.observation import BrowserOutputObservation
-from openhands.events.observation.observation import Observation
-from openhands.llm.llm import LLM
-from openhands.runtime.plugins import (
-    PluginRequirement,
-)
-
-USE_NAV = (
-    os.environ.get('USE_NAV', 'true') == 'true'
-)  # only disable NAV actions when running webarena and miniwob benchmarks
-USE_CONCISE_ANSWER = (
-    os.environ.get('USE_CONCISE_ANSWER', 'false') == 'true'
-)  # only return concise answer when running webarena and miniwob benchmarks
-
-if not USE_NAV and USE_CONCISE_ANSWER:
-    EVAL_MODE = True  # disabled NAV actions and only return concise answer, for webarena and miniwob benchmarks\
-else:
-    EVAL_MODE = False
-
-
-def get_error_prefix(last_browser_action: str) -> str:
-    return f'IMPORTANT! Last action is incorrect:\n{last_browser_action}\nThink again with the current observation of the page.\n'
-
-
-def get_system_message(goal: str, action_space: str) -> str:
-    return f"""\
-# Instructions
-Review the current state of the page and all other information to find the best
-possible next action to accomplish your goal. Your answer will be interpreted
-and executed by a program, make sure to follow the formatting instructions.
-
-# Goal:
-{goal}
-
-# Action Space
-{action_space}
-"""
-
-
-CONCISE_INSTRUCTION = """\
-
-Here is another example with chain of thought of a valid action when providing a concise answer to user:
-"
-In order to accomplish my goal I need to send the information asked back to the user. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I will send a message back to user with the answer.
-```send_msg_to_user("$279.49")```
-"
-"""
-
-
-def get_prompt(
-    error_prefix: str, cur_url: str, cur_axtree_txt: str, prev_action_str: str
-) -> str:
-    prompt = f"""\
-{error_prefix}
-
-# Current Page URL:
-{cur_url}
-
-# Current Accessibility Tree:
-{cur_axtree_txt}
-
-# Previous Actions
-{prev_action_str}
-
-Here is an example with chain of thought of a valid action when clicking on a button:
-"
-In order to accomplish my goal I need to click on the button with bid 12
-```click("12")```
-"
-""".strip()
-    if USE_CONCISE_ANSWER:
-        prompt += CONCISE_INSTRUCTION
-    return prompt
-
-
-class BrowsingAgent(Agent):
-    VERSION = '1.0'
-    """
-    An agent that interacts with the browser.
-    """
-
-    sandbox_plugins: list[PluginRequirement] = []
-    response_parser = BrowsingResponseParser()
-
-    def __init__(
-        self,
-        llm: LLM,
-        config: AgentConfig,
-    ) -> None:
-        """Initializes a new instance of the BrowsingAgent class.
-
-        Parameters:
-        - llm (LLM): The llm to be used by this agent
-        """
-        super().__init__(llm, config)
-        # define a configurable action space, with chat functionality, web navigation, and webpage grounding using accessibility tree and HTML.
-        # see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/highlevel.py for more details
-        action_subsets = ['chat', 'bid']
-        if USE_NAV:
-            action_subsets.append('nav')
-        self.action_space = HighLevelActionSet(
-            subsets=action_subsets,
-            strict=False,  # less strict on the parsing of the actions
-            multiaction=True,  # enable to agent to take multiple actions at once
-        )
-
-        self.reset()
-
-    def reset(self) -> None:
-        """Resets the Browsing Agent."""
-        super().reset()
-        self.cost_accumulator = 0
-        self.error_accumulator = 0
-
-    def step(self, state: State) -> Action:
-        """Performs one step using the Browsing Agent.
-        This includes gathering information on previous steps and prompting the model to make a browsing command to execute.
-
-        Parameters:
-        - state (State): used to get updated info
-
-        Returns:
-        - BrowseInteractiveAction(browsergym_command) - BrowserGym commands to run
-        - MessageAction(content) - Message action to run (e.g. ask for clarification)
-        - AgentFinishAction() - end the interaction
-        """
-        messages: list[Message] = []
-        prev_actions = []
-        cur_url = ''
-        cur_axtree_txt = ''
-        error_prefix = ''
-        last_obs = None
-        last_action = None
-
-        if EVAL_MODE and len(state.history) == 1:
-            # for webarena and miniwob++ eval, we need to retrieve the initial observation already in browser env
-            # initialize and retrieve the first observation by issuing an noop OP
-            # For non-benchmark browsing, the browser env starts with a blank page, and the agent is expected to first navigate to desired websites
-            return BrowseInteractiveAction(browser_actions='noop()')
-
-        for event in state.history:
-            if isinstance(event, BrowseInteractiveAction):
-                prev_actions.append(event.browser_actions)
-                last_action = event
-            elif isinstance(event, MessageAction) and event.source == EventSource.AGENT:
-                # agent has responded, task finished.
-                return AgentFinishAction(outputs={'content': event.content})
-            elif isinstance(event, Observation):
-                last_obs = event
-
-        if EVAL_MODE:
-            prev_actions = prev_actions[1:]  # remove the first noop action
-
-        prev_action_str = '\n'.join(prev_actions)
-        # if the final BrowserInteractiveAction exec BrowserGym's send_msg_to_user,
-        # we should also send a message back to the user in OpenHands and call it a day
-        if (
-            isinstance(last_action, BrowseInteractiveAction)
-            and last_action.browsergym_send_msg_to_user
-        ):
-            return MessageAction(last_action.browsergym_send_msg_to_user)
-
-        if isinstance(last_obs, BrowserOutputObservation):
-            if last_obs.error:
-                # add error recovery prompt prefix
-                error_prefix = get_error_prefix(last_obs.last_browser_action)
-                self.error_accumulator += 1
-                if self.error_accumulator > 5:
-                    return MessageAction('Too many errors encountered. Task failed.')
-
-            cur_url = last_obs.url
-
-            try:
-                cur_axtree_txt = flatten_axtree_to_str(
-                    last_obs.axtree_object,
-                    extra_properties=last_obs.extra_element_properties,
-                    with_clickable=True,
-                    filter_visible_only=True,
-                )
-            except Exception as e:
-                logger.error(
-                    'Error when trying to process the accessibility tree: %s', e
-                )
-                return MessageAction('Error encountered when browsing.')
-
-        goal, _ = state.get_current_user_intent()
-
-        if goal is None:
-            goal = state.inputs['task']
-
-        system_msg = get_system_message(
-            goal,
-            self.action_space.describe(with_long_description=False, with_examples=True),
-        )
-
-        messages.append(Message(role='system', content=[TextContent(text=system_msg)]))
-
-        prompt = get_prompt(error_prefix, cur_url, cur_axtree_txt, prev_action_str)
-        messages.append(Message(role='user', content=[TextContent(text=prompt)]))
-
-        response = self.llm.completion(
-            messages=self.llm.format_messages_for_llm(messages),
-            stop=[')```', ')\n```'],
-        )
-        return self.response_parser.parse(response)
@@ -1,123 +0,0 @@
-import ast
-import re
-
-from openhands.controller.action_parser import ActionParser, ResponseParser
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import (
-    Action,
-    BrowseInteractiveAction,
-)
-
-
-class BrowsingResponseParser(ResponseParser):
-    def __init__(self):
-        # Need to pay attention to the item order in self.action_parsers
-        super().__init__()
-        self.action_parsers = [BrowsingActionParserMessage()]
-        self.default_parser = BrowsingActionParserBrowseInteractive()
-
-    def parse(self, response: str) -> Action:
-        action_str = self.parse_response(response)
-        return self.parse_action(action_str)
-
-    def parse_response(self, response) -> str:
-        action_str = response['choices'][0]['message']['content']
-        if action_str is None:
-            return ''
-        action_str = action_str.strip()
-        # Ensure action_str ends with ')```'
-        if action_str:
-            if not action_str.endswith('```'):
-                if action_str.endswith(')'):
-                    action_str += '```'  # prevent duplicate ending paranthesis, e.g. send_msg_to_user('Done'))
-                else:
-                    action_str += ')```'  # expected format
-        logger.debug(action_str)
-        return action_str
-
-    def parse_action(self, action_str: str) -> Action:
-        for action_parser in self.action_parsers:
-            if action_parser.check_condition(action_str):
-                return action_parser.parse(action_str)
-        return self.default_parser.parse(action_str)
-
-
-class BrowsingActionParserMessage(ActionParser):
-    """Parser action:
-    - BrowseInteractiveAction(browser_actions) - unexpected response format, message back to user
-    """
-
-    def __init__(
-        self,
-    ):
-        pass
-
-    def check_condition(self, action_str: str) -> bool:
-        return '```' not in action_str
-
-    def parse(self, action_str: str) -> Action:
-        msg = f'send_msg_to_user("""{action_str}""")'
-        return BrowseInteractiveAction(
-            browser_actions=msg,
-            thought=action_str,
-            browsergym_send_msg_to_user=action_str,
-        )
-
-
-class BrowsingActionParserBrowseInteractive(ActionParser):
-    """Parser action:
-    - BrowseInteractiveAction(browser_actions) - handle send message to user function call in BrowserGym
-    """
-
-    def __init__(
-        self,
-    ):
-        pass
-
-    def check_condition(self, action_str: str) -> bool:
-        return True
-
-    def parse(self, action_str: str) -> Action:
-        # parse the action string into browser_actions and thought
-        # the LLM can return only one string, or both
-
-        # when both are returned, it looks like this:
-        ### Based on the current state of the page and the goal of finding out the president of the USA, the next action should involve searching for information related to the president.
-        ### To achieve this, we can navigate to a reliable source such as a search engine or a specific website that provides information about the current president of the USA.
-        ### Here is an example of a valid action to achieve this:
-        ### ```
-        ### goto('https://www.whitehouse.gov/about-the-white-house/presidents/'
-        # in practice, BrowsingResponseParser.parse_response also added )``` to the end of the string
-
-        # when the LLM returns only one string, it looks like this:
-        ### goto('https://www.whitehouse.gov/about-the-white-house/presidents/')
-        # and parse_response added )``` to the end of the string
-        parts = action_str.split('```')
-        browser_actions = (
-            parts[1].strip() if parts[1].strip() != '' else parts[0].strip()
-        )
-        thought = parts[0].strip() if parts[1].strip() != '' else ''
-
-        # if the LLM wants to talk to the user, we extract the message
-        msg_content = ''
-        for sub_action in browser_actions.split('\n'):
-            if 'send_msg_to_user(' in sub_action:
-                try:
-                    tree = ast.parse(sub_action)
-                    args = tree.body[0].value.args  # type: ignore
-                    msg_content = args[0].value
-                except SyntaxError:
-                    logger.error(f'Error parsing action: {sub_action}')
-                    # the syntax was not correct, but we can still try to get the message
-                    # e.g. send_msg_to_user("Hello, world!") or send_msg_to_user('Hello, world!'
-                    match = re.search(r'send_msg_to_user\((["\'])(.*?)\1\)', sub_action)
-                    if match:
-                        msg_content = match.group(2)
-                    else:
-                        msg_content = ''
-
-        return BrowseInteractiveAction(
-            browser_actions=browser_actions,
-            thought=thought,
-            browsergym_send_msg_to_user=msg_content,
-        )
@@ -1,158 +0,0 @@
-import collections
-import re
-from warnings import warn
-
-import yaml
-
-
-def yaml_parser(message):
-    """Parse a yaml message for the retry function."""
-    # saves gpt-3.5 from some yaml parsing errors
-    message = re.sub(r':\s*\n(?=\S|\n)', ': ', message)
-
-    try:
-        value = yaml.safe_load(message)
-        valid = True
-        retry_message = ''
-    except yaml.YAMLError as e:
-        warn(str(e), stacklevel=2)
-        value = {}
-        valid = False
-        retry_message = "Your response is not a valid yaml. Please try again and be careful to the format. Don't add any apology or comment, just the answer."
-    return value, valid, retry_message
-
-
-def _compress_chunks(text, identifier, skip_list, split_regex='\n\n+'):
-    """Compress a string by replacing redundant chunks by identifiers. Chunks are defined by the split_regex."""
-    text_list = re.split(split_regex, text)
-    text_list = [chunk.strip() for chunk in text_list]
-    counter = collections.Counter(text_list)
-    def_dict = {}
-    id = 0
-
-    # Store items that occur more than once in a dictionary
-    for item, count in counter.items():
-        if count > 1 and item not in skip_list and len(item) > 10:
-            def_dict[f'{identifier}-{id}'] = item
-            id += 1
-
-    # Replace redundant items with their identifiers in the text
-    compressed_text = '\n'.join(text_list)
-    for key, value in def_dict.items():
-        compressed_text = compressed_text.replace(value, key)
-
-    return def_dict, compressed_text
-
-
-def compress_string(text):
-    """Compress a string by replacing redundant paragraphs and lines with identifiers."""
-    # Perform paragraph-level compression
-    def_dict, compressed_text = _compress_chunks(
-        text, identifier='§', skip_list=[], split_regex='\n\n+'
-    )
-
-    # Perform line-level compression, skipping any paragraph identifiers
-    line_dict, compressed_text = _compress_chunks(
-        compressed_text, '¶', list(def_dict.keys()), split_regex='\n+'
-    )
-    def_dict.update(line_dict)
-
-    # Create a definitions section
-    def_lines = ['<definitions>']
-    for key, value in def_dict.items():
-        def_lines.append(f'{key}:\n{value}')
-    def_lines.append('</definitions>')
-    definitions = '\n'.join(def_lines)
-
-    return definitions + '\n' + compressed_text
-
-
-def extract_html_tags(text, keys):
-    """Extract the content within HTML tags for a list of keys.
-
-    Parameters
-    ----------
-    text : str
-        The input string containing the HTML tags.
-    keys : list of str
-        The HTML tags to extract the content from.
-
-    Returns:
-    -------
-    dict
-        A dictionary mapping each key to a list of subset in `text` that match the key.
-
-    Notes:
-    -----
-    All text and keys will be converted to lowercase before matching.
-
-    """
-    content_dict = {}
-    # text = text.lower()
-    # keys = set([k.lower() for k in keys])
-    for key in keys:
-        pattern = f'<{key}>(.*?)</{key}>'
-        matches = re.findall(pattern, text, re.DOTALL)
-        if matches:
-            content_dict[key] = [match.strip() for match in matches]
-    return content_dict
-
-
-class ParseError(Exception):
-    pass
-
-
-def parse_html_tags_raise(text, keys=(), optional_keys=(), merge_multiple=False):
-    """A version of parse_html_tags that raises an exception if the parsing is not successful."""
-    content_dict, valid, retry_message = parse_html_tags(
-        text, keys, optional_keys, merge_multiple=merge_multiple
-    )
-    if not valid:
-        raise ParseError(retry_message)
-    return content_dict
-
-
-def parse_html_tags(text, keys=(), optional_keys=(), merge_multiple=False):
-    """Satisfy the parse api, extracts 1 match per key and validates that all keys are present
-
-    Parameters
-    ----------
-    text : str
-        The input string containing the HTML tags.
-    keys : list of str
-        The HTML tags to extract the content from.
-    optional_keys : list of str
-        The HTML tags to extract the content from, but are optional.
-
-    Returns:
-    -------
-    dict
-        A dictionary mapping each key to subset of `text` that match the key.
-    bool
-        Whether the parsing was successful.
-    str
-        A message to be displayed to the agent if the parsing was not successful.
-    """
-    all_keys = tuple(keys) + tuple(optional_keys)
-    content_dict = extract_html_tags(text, all_keys)
-    retry_messages = []
-
-    for key in all_keys:
-        if key not in content_dict:
-            if key not in optional_keys:
-                retry_messages.append(f'Missing the key <{key}> in the answer.')
-        else:
-            val = content_dict[key]
-            content_dict[key] = val[0]
-            if len(val) > 1:
-                if not merge_multiple:
-                    retry_messages.append(
-                        f'Found multiple instances of the key {key}. You should have only one of them.'
-                    )
-                else:
-                    # merge the multiple instances
-                    content_dict[key] = '\n'.join(val)
-
-    valid = len(retry_messages) == 0
-    retry_message = '\n'.join(retry_messages)
-    return content_dict, valid, retry_message
@@ -0,0 +1,21 @@
+# CodeAct-based Agent Framework
+
+This folder implements the [CodeAct idea](https://arxiv.org/abs/2402.13463) that relies on LLM to autonomously perform actions in a Bash shell. It requires more from the LLM itself: LLM needs to be capable enough to do all the stuff autonomously, instead of stuck in an infinite loop. 
+
+A minimalistic example can be found at [research/codeact/examples/run_flask_server_with_bash.py](./examples/run_flask_server_with_bash.py):
+
+```bash
+mkdir workspace
+PYTHONPATH=`pwd`:$PYTHONPATH python3 opendevin/main.py -d ./workspace -c CodeActAgent -t "Please write a flask app that returns 'Hello, World\!' at the root URL, then start the app on port 5000. python3 has already been installed for you."
+```
+
+
+Example: prompts `gpt-4-0125-preview` to write a flask server, install `flask` library, and start the server.
+
+<img width="951" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/325c3115-a343-4cc5-a92b-f1e5d552a077">
+
+<img width="957" alt="image" src="https://github.com/OpenDevin/OpenDevin/assets/38853559/68ad10c1-744a-4e9d-bb29-0f163d665a0a">
+
+Most of the things are working as expected, except at the end, the model did not follow the instruction to stop the interaction by outputting `<execute> exit </execute>` as instructed. 
+
+**TODO**: This should be fixable by either (1) including a complete in-context example like [this](https://github.com/xingyaoww/mint-bench/blob/main/mint/tasks/in_context_examples/reasoning/with_tool.txt), OR (2) collect some interaction data like this and fine-tune a model (like [this](https://github.com/xingyaoww/code-act), a more complex route).
@@ -1,4 +1,4 @@
-from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
-from openhands.controller.agent import Agent
+from opendevin.agent import Agent
+from .codeact_agent import CodeActAgent

-Agent.register('CodeActAgent', CodeActAgent)
+Agent.register("CodeActAgent", CodeActAgent)
@@ -1,504 +1,119 @@
-import json
-import os
-from collections import deque
+import re
+from typing import List, Mapping

-from litellm import ModelResponse

-import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling
-from openhands.controller.agent import Agent
-from openhands.controller.state.state import State
-from openhands.core.config import AgentConfig
-from openhands.core.logger import openhands_logger as logger
-from openhands.core.message import ImageContent, Message, TextContent
-from openhands.events.action import (
+from opendevin.agent import Agent
+from opendevin.state import State
+from opendevin.action import (
    Action,
-    AgentDelegateAction,
-    AgentFinishAction,
-    BrowseInteractiveAction,
-    BrowseURLAction,
    CmdRunAction,
-    FileEditAction,
-    IPythonRunCellAction,
-    MessageAction,
+    AgentEchoAction,
+    AgentFinishAction,
 )
-from openhands.events.observation import (
-    AgentDelegateObservation,
-    BrowserOutputObservation,
+from opendevin.observation import (
    CmdOutputObservation,
-    FileEditObservation,
-    IPythonRunCellObservation,
-    UserRejectObservation,
+    AgentMessageObservation,
 )
-from openhands.events.observation.error import ErrorObservation
-from openhands.events.observation.observation import Observation
-from openhands.events.serialization.event import truncate_content
-from openhands.llm.llm import LLM
-from openhands.runtime.plugins import (
-    AgentSkillsRequirement,
-    JupyterRequirement,
-    PluginRequirement,
+
+from opendevin.llm.llm import LLM
+
+SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
+You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
+
+DO NOT provide code in ```triple backticks```. Instead, you should execute bash command on behalf of the user by wrapping them with <execute> and </execute>.
+For example:
+
+You can list the files in the current directory by executing the following command:
+<execute>ls</execute>
+
+You can also install packages using pip:
+<execute> pip install numpy </execute>
+
+You can also write a block of code to a file:
+<execute>
+echo "import math
+print(math.pi)" > math.py
+</execute>
+
+When you are done, execute "exit" to close the shell and end the conversation.
+"""
+
+INVALID_INPUT_MESSAGE = (
+    "I don't understand your input. \n"
+    "If you want to execute command, please use <execute> YOUR_COMMAND_HERE </execute>.\n"
+    "If you already completed the task, please exit the shell by generating: <execute> exit </execute>."
 )
-from openhands.utils.prompt import PromptManager
+
+
+def parse_response(response) -> str:
+    action = response.choices[0].message.content
+    if "<execute>" in action and "</execute>" not in action:
+        action += "</execute>"
+    return action


 class CodeActAgent(Agent):
-    VERSION = '2.2'
-    """
-    The Code Act Agent is a minimalist agent.
-    The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step.
-
-    ### Overview
-
-    This agent implements the CodeAct idea ([paper](https://arxiv.org/abs/2402.01030), [tweet](https://twitter.com/xingyaow_/status/1754556835703751087)) that consolidates LLM agents’ **act**ions into a unified **code** action space for both *simplicity* and *performance* (see paper for more details).
-
-    The conceptual idea is illustrated below. At each turn, the agent can:
-
-    1. **Converse**: Communicate with humans in natural language to ask for clarification, confirmation, etc.
-    2. **CodeAct**: Choose to perform the task by executing code
-    - Execute any valid Linux `bash` command
-    - Execute any valid `Python` code with [an interactive Python interpreter](https://ipython.org/). This is simulated through `bash` command, see plugin system below for more details.
-
-    ![image](https://github.com/All-Hands-AI/OpenHands/assets/38853559/92b622e3-72ad-4a61-8f41-8c040b6d5fb3)
-
-    """
-
-    sandbox_plugins: list[PluginRequirement] = [
-        # NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since
-        # AgentSkillsRequirement provides a lot of Python functions,
-        # and it needs to be initialized before Jupyter for Jupyter to use those functions.
-        AgentSkillsRequirement(),
-        JupyterRequirement(),
-    ]
-
    def __init__(
        self,
        llm: LLM,
-        config: AgentConfig,
    ) -> None:
-        """Initializes a new instance of the CodeActAgent class.
+        """
+        Initializes a new instance of the CodeActAgent class.

        Parameters:
-        - llm (LLM): The llm to be used by this agent
+        - instruction (str): The instruction for the agent to execute.
+        - max_steps (int): The maximum number of steps to run the agent.
        """
-        super().__init__(llm, config)
-        self.reset()
-
-        self.mock_function_calling = False
-        if not self.llm.is_function_calling_active():
-            logger.info(
-                f'Function calling not enabled for model {self.llm.config.model}. '
-                'Mocking function calling via prompting.'
-            )
-            self.mock_function_calling = True
-
-        # Function calling mode
-        self.tools = codeact_function_calling.get_tools(
-            codeact_enable_browsing=self.config.codeact_enable_browsing,
-            codeact_enable_jupyter=self.config.codeact_enable_jupyter,
-            codeact_enable_llm_editor=self.config.codeact_enable_llm_editor,
-        )
-        logger.debug(
-            f'TOOLS loaded for CodeActAgent: {json.dumps(self.tools, indent=2)}'
-        )
-        self.prompt_manager = PromptManager(
-            microagent_dir=os.path.join(os.path.dirname(__file__), 'micro')
-            if self.config.use_microagents
-            else None,
-            prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
-            disabled_microagents=self.config.disabled_microagents,
-        )
-
-        self.pending_actions: deque[Action] = deque()
-
-    def get_action_message(
-        self,
-        action: Action,
-        pending_tool_call_action_messages: dict[str, Message],
-    ) -> list[Message]:
-        """Converts an action into a message format that can be sent to the LLM.
-
-        This method handles different types of actions and formats them appropriately:
-        1. For tool-based actions (AgentDelegate, CmdRun, IPythonRunCell, FileEdit) and agent-sourced AgentFinish:
-            - In function calling mode: Stores the LLM's response in pending_tool_call_action_messages
-            - In non-function calling mode: Creates a message with the action string
-        2. For MessageActions: Creates a message with the text content and optional image content
-
-        Args:
-            action (Action): The action to convert. Can be one of:
-                - CmdRunAction: For executing bash commands
-                - IPythonRunCellAction: For running IPython code
-                - FileEditAction: For editing files
-                - BrowseInteractiveAction: For browsing the web
-                - AgentFinishAction: For ending the interaction
-                - MessageAction: For sending messages
-            pending_tool_call_action_messages (dict[str, Message]): Dictionary mapping response IDs
-                to their corresponding messages. Used in function calling mode to track tool calls
-                that are waiting for their results.
-
-        Returns:
-            list[Message]: A list containing the formatted message(s) for the action.
-                May be empty if the action is handled as a tool call in function calling mode.
-
-        Note:
-            In function calling mode, tool-based actions are stored in pending_tool_call_action_messages
-            rather than being returned immediately. They will be processed later when all corresponding
-            tool call results are available.
-        """
-        # create a regular message from an event
-        if isinstance(
-            action,
-            (
-                AgentDelegateAction,
-                IPythonRunCellAction,
-                FileEditAction,
-                BrowseInteractiveAction,
-                BrowseURLAction,
-            ),
-        ) or (isinstance(action, CmdRunAction) and action.source == 'agent'):
-            tool_metadata = action.tool_call_metadata
-            assert tool_metadata is not None, (
-                'Tool call metadata should NOT be None when function calling is enabled. Action: '
-                + str(action)
-            )
-
-            llm_response: ModelResponse = tool_metadata.model_response
-            assistant_msg = llm_response.choices[0].message
-
-            # Add the LLM message (assistant) that initiated the tool calls
-            # (overwrites any previous message with the same response_id)
-            logger.debug(
-                f'Tool calls type: {type(assistant_msg.tool_calls)}, value: {assistant_msg.tool_calls}'
-            )
-            pending_tool_call_action_messages[llm_response.id] = Message(
-                role=assistant_msg.role,
-                # tool call content SHOULD BE a string
-                content=[TextContent(text=assistant_msg.content or '')]
-                if assistant_msg.content is not None
-                else [],
-                tool_calls=assistant_msg.tool_calls,
-            )
-            return []
-        elif isinstance(action, AgentFinishAction):
-            role = 'user' if action.source == 'user' else 'assistant'
-
-            # when agent finishes, it has tool_metadata
-            # which has already been executed, and it doesn't have a response
-            # when the user finishes (/exit), we don't have tool_metadata
-            tool_metadata = action.tool_call_metadata
-            if tool_metadata is not None:
-                # take the response message from the tool call
-                assistant_msg = tool_metadata.model_response.choices[0].message
-                content = assistant_msg.content or ''
-
-                # save content if any, to thought
-                if action.thought:
-                    if action.thought != content:
-                        action.thought += '\n' + content
-                else:
-                    action.thought = content
-
-                # remove the tool call metadata
-                action.tool_call_metadata = None
-            return [
-                Message(
-                    role=role,
-                    content=[TextContent(text=action.thought)],
-                )
-            ]
-        elif isinstance(action, MessageAction):
-            role = 'user' if action.source == 'user' else 'assistant'
-            content = [TextContent(text=action.content or '')]
-            if self.llm.vision_is_active() and action.image_urls:
-                content.append(ImageContent(image_urls=action.image_urls))
-            return [
-                Message(
-                    role=role,
-                    content=content,
-                )
-            ]
-        elif isinstance(action, CmdRunAction) and action.source == 'user':
-            content = [
-                TextContent(text=f'User executed the command:\n{action.command}')
-            ]
-            return [
-                Message(
-                    role='user',
-                    content=content,
-                )
-            ]
-        return []
-
-    def get_observation_message(
-        self,
-        obs: Observation,
-        tool_call_id_to_message: dict[str, Message],
-    ) -> list[Message]:
-        """Converts an observation into a message format that can be sent to the LLM.
-
-        This method handles different types of observations and formats them appropriately:
-        - CmdOutputObservation: Formats command execution results with exit codes
-        - IPythonRunCellObservation: Formats IPython cell execution results, replacing base64 images
-        - FileEditObservation: Formats file editing results
-        - AgentDelegateObservation: Formats results from delegated agent tasks
-        - ErrorObservation: Formats error messages from failed actions
-        - UserRejectObservation: Formats user rejection messages
-
-        In function calling mode, observations with tool_call_metadata are stored in
-        tool_call_id_to_message for later processing instead of being returned immediately.
-
-        Args:
-            obs (Observation): The observation to convert
-            tool_call_id_to_message (dict[str, Message]): Dictionary mapping tool call IDs
-                to their corresponding messages (used in function calling mode)
-
-        Returns:
-            list[Message]: A list containing the formatted message(s) for the observation.
-                May be empty if the observation is handled as a tool response in function calling mode.
-
-        Raises:
-            ValueError: If the observation type is unknown
-        """
-        message: Message
-        max_message_chars = self.llm.config.max_message_chars
-        if isinstance(obs, CmdOutputObservation):
-            # if it doesn't have tool call metadata, it was triggered by a user action
-            if obs.tool_call_metadata is None:
-                text = truncate_content(
-                    f'\nObserved result of command executed by user:\n{obs.content}',
-                    max_message_chars,
-                )
-            else:
-                text = truncate_content(
-                    obs.content + obs.interpreter_details, max_message_chars
-                )
-            text += f'\n[Command finished with exit code {obs.exit_code}]'
-            message = Message(role='user', content=[TextContent(text=text)])
-        elif isinstance(obs, IPythonRunCellObservation):
-            text = obs.content
-            # replace base64 images with a placeholder
-            splitted = text.split('\n')
-            for i, line in enumerate(splitted):
-                if '![image](data:image/png;base64,' in line:
-                    splitted[i] = (
-                        '![image](data:image/png;base64, ...) already displayed to user'
-                    )
-            text = '\n'.join(splitted)
-            text = truncate_content(text, max_message_chars)
-            message = Message(role='user', content=[TextContent(text=text)])
-        elif isinstance(obs, FileEditObservation):
-            text = truncate_content(str(obs), max_message_chars)
-            message = Message(role='user', content=[TextContent(text=text)])
-        elif isinstance(obs, BrowserOutputObservation):
-            text = obs.get_agent_obs_text()
-            message = Message(
-                role='user',
-                content=[TextContent(text=text)],
-            )
-        elif isinstance(obs, AgentDelegateObservation):
-            text = truncate_content(
-                obs.outputs['content'] if 'content' in obs.outputs else '',
-                max_message_chars,
-            )
-            message = Message(role='user', content=[TextContent(text=text)])
-        elif isinstance(obs, ErrorObservation):
-            text = truncate_content(obs.content, max_message_chars)
-            text += '\n[Error occurred in processing last action]'
-            message = Message(role='user', content=[TextContent(text=text)])
-        elif isinstance(obs, UserRejectObservation):
-            text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
-            text += '\n[Last action has been rejected by the user]'
-            message = Message(role='user', content=[TextContent(text=text)])
-        else:
-            # If an observation message is not returned, it will cause an error
-            # when the LLM tries to return the next message
-            raise ValueError(f'Unknown observation type: {type(obs)}')
-
-        # Update the message as tool response properly
-        if (tool_call_metadata := obs.tool_call_metadata) is not None:
-            tool_call_id_to_message[tool_call_metadata.tool_call_id] = Message(
-                role='tool',
-                content=message.content,
-                tool_call_id=tool_call_metadata.tool_call_id,
-                name=tool_call_metadata.function_name,
-            )
-            # No need to return the observation message
-            # because it will be added by get_action_message when all the corresponding
-            # tool calls in the SAME request are processed
-            return []
-
-        return [message]
-
-    def reset(self) -> None:
-        """Resets the CodeAct Agent."""
-        super().reset()
+        super().__init__(llm)
+        self.messages: List[Mapping[str, str]] = []

    def step(self, state: State) -> Action:
-        """Performs one step using the CodeAct Agent.
-        This includes gathering info on previous steps and prompting the model to make a command to execute.
+        if len(self.messages) == 0:
+            assert state.plan.main_goal, "Expecting instruction to be set"
+            self.messages = [
+                {"role": "system", "content": SYSTEM_MESSAGE},
+                {"role": "user", "content": state.plan.main_goal},
+            ]
+        updated_info = state.updated_info
+        if updated_info:
+            for prev_action, obs in updated_info:
+                assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action"
+                if isinstance(obs, AgentMessageObservation):  # warning message from itself
+                    self.messages.append({"role": "user", "content": obs.content})
+                elif isinstance(obs, CmdOutputObservation):
+                    content = "OBSERVATION:\n" + obs.content
+                    content += f"\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]"
+                    self.messages.append({"role": "user", "content": content})
+                else:
+                    raise NotImplementedError(f"Unknown observation type: {obs.__class__}")
+        response = self.llm.completion(
+            messages=self.messages,
+            stop=["</execute>"],
+            temperature=0.0,
+            seed=42,
+        )
+        action_str: str = parse_response(response)
+        self.messages.append({"role": "assistant", "content": action_str})

-        Parameters:
-        - state (State): used to get updated info
+        command = re.search(r"<execute>(.*)</execute>", action_str, re.DOTALL)
+        if command is not None:
+            # a command was found
+            command_group = command.group(1)
+            if command_group.strip() == "exit":
+                return AgentFinishAction()
+            return CmdRunAction(command = command_group)
+            # # execute the code
+            # # TODO: does exit_code get loaded into Message?
+            # exit_code, observation = self.env.execute(command_group)
+            # self._history.append(Message(Role.ASSISTANT, observation))
+        else:
+            # we could provide a error message for the model to continue similar to
+            # https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
+            # observation = INVALID_INPUT_MESSAGE
+            # self._history.append(Message(Role.ASSISTANT, observation))
+            return AgentEchoAction(content=INVALID_INPUT_MESSAGE)  # warning message to itself

-        Returns:
-        - CmdRunAction(command) - bash command to run
-        - IPythonRunCellAction(code) - IPython code to run
-        - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
-        - MessageAction(content) - Message action to run (e.g. ask for clarification)
-        - AgentFinishAction() - end the interaction
-        """
-        # Continue with pending actions if any
-        if self.pending_actions:
-            return self.pending_actions.popleft()

-        # if we're done, go back
-        latest_user_message = state.get_last_user_message()
-        if latest_user_message and latest_user_message.content.strip() == '/exit':
-            return AgentFinishAction()
+    def search_memory(self, query: str) -> List[str]:
+        raise NotImplementedError("Implement this abstract method")

-        # prepare what we want to send to the LLM
-        messages = self._get_messages(state)
-        params: dict = {
-            'messages': self.llm.format_messages_for_llm(messages),
-        }
-        params['tools'] = self.tools
-        if self.mock_function_calling:
-            params['mock_function_calling'] = True
-        response = self.llm.completion(**params)
-        actions = codeact_function_calling.response_to_actions(response)
-        for action in actions:
-            self.pending_actions.append(action)
-        return self.pending_actions.popleft()
-
-    def _get_messages(self, state: State) -> list[Message]:
-        """Constructs the message history for the LLM conversation.
-
-        This method builds a structured conversation history by processing events from the state
-        and formatting them into messages that the LLM can understand. It handles both regular
-        message flow and function-calling scenarios.
-
-        The method performs the following steps:
-        1. Initializes with system prompt and optional initial user message
-        2. Processes events (Actions and Observations) into messages
-        3. Handles tool calls and their responses in function-calling mode
-        4. Manages message role alternation (user/assistant/tool)
-        5. Applies caching for specific LLM providers (e.g., Anthropic)
-        6. Adds environment reminders for non-function-calling mode
-
-        Args:
-            state (State): The current state object containing conversation history and other metadata
-
-        Returns:
-            list[Message]: A list of formatted messages ready for LLM consumption, including:
-                - System message with prompt
-                - Initial user message (if configured)
-                - Action messages (from both user and assistant)
-                - Observation messages (including tool responses)
-                - Environment reminders (in non-function-calling mode)
-
-        Note:
-            - In function-calling mode, tool calls and their responses are carefully tracked
-              to maintain proper conversation flow
-            - Messages from the same role are combined to prevent consecutive same-role messages
-            - For Anthropic models, specific messages are cached according to their documentation
-        """
-        if not self.prompt_manager:
-            raise Exception('Prompt Manager not instantiated.')
-
-        messages: list[Message] = [
-            Message(
-                role='system',
-                content=[
-                    TextContent(
-                        text=self.prompt_manager.get_system_message(),
-                        cache_prompt=self.llm.is_caching_prompt_active(),
-                    )
-                ],
-            )
-        ]
-        example_message = self.prompt_manager.get_example_user_message()
-        if example_message:
-            messages.append(
-                Message(
-                    role='user',
-                    content=[TextContent(text=example_message)],
-                    cache_prompt=self.llm.is_caching_prompt_active(),
-                )
-            )
-
-        pending_tool_call_action_messages: dict[str, Message] = {}
-        tool_call_id_to_message: dict[str, Message] = {}
-        events = list(state.history)
-        for event in events:
-            # create a regular message from an event
-            if isinstance(event, Action):
-                messages_to_add = self.get_action_message(
-                    action=event,
-                    pending_tool_call_action_messages=pending_tool_call_action_messages,
-                )
-            elif isinstance(event, Observation):
-                messages_to_add = self.get_observation_message(
-                    obs=event,
-                    tool_call_id_to_message=tool_call_id_to_message,
-                )
-            else:
-                raise ValueError(f'Unknown event type: {type(event)}')
-
-            # Check pending tool call action messages and see if they are complete
-            _response_ids_to_remove = []
-            for (
-                response_id,
-                pending_message,
-            ) in pending_tool_call_action_messages.items():
-                assert pending_message.tool_calls is not None, (
-                    'Tool calls should NOT be None when function calling is enabled & the message is considered pending tool call. '
-                    f'Pending message: {pending_message}'
-                )
-                if all(
-                    tool_call.id in tool_call_id_to_message
-                    for tool_call in pending_message.tool_calls
-                ):
-                    # If complete:
-                    # -- 1. Add the message that **initiated** the tool calls
-                    messages_to_add.append(pending_message)
-                    # -- 2. Add the tool calls **results***
-                    for tool_call in pending_message.tool_calls:
-                        messages_to_add.append(tool_call_id_to_message[tool_call.id])
-                        tool_call_id_to_message.pop(tool_call.id)
-                    _response_ids_to_remove.append(response_id)
-            # Cleanup the processed pending tool messages
-            for response_id in _response_ids_to_remove:
-                pending_tool_call_action_messages.pop(response_id)
-
-            for message in messages_to_add:
-                if message:
-                    if message.role == 'user':
-                        self.prompt_manager.enhance_message(message)
-                    # handle error if the message is the SAME role as the previous message
-                    # litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'}
-                    # there shouldn't be two consecutive messages from the same role
-                    # NOTE: we shouldn't combine tool messages because each of them has a different tool_call_id
-                    if (
-                        messages
-                        and messages[-1].role == message.role
-                        and message.role != 'tool'
-                    ):
-                        messages[-1].content.extend(message.content)
-                    else:
-                        messages.append(message)
-
-        if self.llm.is_caching_prompt_active():
-            # NOTE: this is only needed for anthropic
-            # following logic here:
-            # https://github.com/anthropics/anthropic-quickstarts/blob/8f734fd08c425c6ec91ddd613af04ff87d70c5a0/computer-use-demo/computer_use_demo/loop.py#L241-L262
-            breakpoints_remaining = 3  # remaining 1 for system/tool
-            for message in reversed(messages):
-                if message.role == 'user' or message.role == 'tool':
-                    if breakpoints_remaining > 0:
-                        message.content[
-                            -1
-                        ].cache_prompt = True  # Last item inside the message content
-                        breakpoints_remaining -= 1
-                    else:
-                        break
-
-        return messages
@@ -1,554 +0,0 @@
-"""This file contains the function calling implementation for different actions.
-
-This is similar to the functionality of `CodeActResponseParser`.
-"""
-
-import json
-
-from browsergym.core.action.highlevel import HighLevelActionSet
-from litellm import (
-    ChatCompletionToolParam,
-    ChatCompletionToolParamFunctionChunk,
-    ModelResponse,
-)
-
-from openhands.core.exceptions import FunctionCallNotExistsError
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import (
-    Action,
-    AgentDelegateAction,
-    AgentFinishAction,
-    BrowseInteractiveAction,
-    BrowseURLAction,
-    CmdRunAction,
-    FileEditAction,
-    IPythonRunCellAction,
-    MessageAction,
-)
-from openhands.events.tool import ToolCallMetadata
-
-_BASH_DESCRIPTION = """Execute a bash command in the terminal.
-* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
-* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process.
-* Timeout: If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background.
-"""
-
-CmdRunTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='execute_bash',
-        description=_BASH_DESCRIPTION,
-        parameters={
-            'type': 'object',
-            'properties': {
-                'command': {
-                    'type': 'string',
-                    'description': 'The bash command to execute. Can be empty to view additional logs when previous exit code is `-1`. Can be `ctrl+c` to interrupt the currently running process.',
-                },
-            },
-            'required': ['command'],
-        },
-    ),
-)
-
-_IPYTHON_DESCRIPTION = """Run a cell of Python code in an IPython environment.
-* The assistant should define variables and import packages before using them.
-* The variable defined in the IPython environment will not be available outside the IPython environment (e.g., in terminal).
-"""
-
-IPythonTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='execute_ipython_cell',
-        description=_IPYTHON_DESCRIPTION,
-        parameters={
-            'type': 'object',
-            'properties': {
-                'code': {
-                    'type': 'string',
-                    'description': 'The Python code to execute. Supports magic commands like %pip.',
-                },
-            },
-            'required': ['code'],
-        },
-    ),
-)
-
-_FILE_EDIT_DESCRIPTION = """Edit a file.
-* The assistant can edit files by specifying the file path and providing a draft of the new file content.
-* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections.
-* IMPORTANT: For large files (e.g., > 300 lines), specify the range of lines to edit using `start` and `end` (1-indexed, inclusive). The range should be smaller than 300 lines.
-* To append to a file, set both `start` and `end` to `-1`.
-* If the file doesn't exist, a new file will be created with the provided content.
-
-**Example 1: general edit for short files**
-For example, given an existing file `/path/to/file.py` that looks like this:
-(this is the end of the file)
-1|class MyClass:
-2|    def __init__(self):
-3|        self.x = 1
-4|        self.y = 2
-5|        self.z = 3
-6|
-7|print(MyClass().z)
-8|print(MyClass().x)
-(this is the end of the file)
-
-The assistant wants to edit the file to look like this:
-(this is the end of the file)
-1|class MyClass:
-2|    def __init__(self):
-3|        self.x = 1
-4|        self.y = 2
-5|
-6|print(MyClass().y)
-(this is the end of the file)
-
-The assistant may produce an edit action like this:
-path="/path/to/file.txt" start=1 end=-1
-content=```
-class MyClass:
-    def __init__(self):
-        # no changes before
-        self.y = 2
-        # self.z is removed
-
-# MyClass().z is removed
-print(MyClass().y)
-```
-
-**Example 2: append to file for short files**
-For example, given an existing file `/path/to/file.py` that looks like this:
-(this is the end of the file)
-1|class MyClass:
-2|    def __init__(self):
-3|        self.x = 1
-4|        self.y = 2
-5|        self.z = 3
-6|
-7|print(MyClass().z)
-8|print(MyClass().x)
-(this is the end of the file)
-
-To append the following lines to the file:
-```python
-print(MyClass().y)
-```
-
-The assistant may produce an edit action like this:
-path="/path/to/file.txt" start=-1 end=-1
-content=```
-print(MyClass().y)
-```
-
-**Example 3: edit for long files**
-
-Given an existing file `/path/to/file.py` that looks like this:
-(1000 more lines above)
-1001|class MyClass:
-1002|    def __init__(self):
-1003|        self.x = 1
-1004|        self.y = 2
-1005|        self.z = 3
-1006|
-1007|print(MyClass().z)
-1008|print(MyClass().x)
-(2000 more lines below)
-
-The assistant wants to edit the file to look like this:
-
-(1000 more lines above)
-1001|class MyClass:
-1002|    def __init__(self):
-1003|        self.x = 1
-1004|        self.y = 2
-1005|
-1006|print(MyClass().y)
-(2000 more lines below)
-
-The assistant may produce an edit action like this:
-path="/path/to/file.txt" start=1001 end=1008
-content=```
-class MyClass:
-    def __init__(self):
-        # no changes before
-        self.y = 2
-        # self.z is removed
-
-# MyClass().z is removed
-print(MyClass().y)
-```
-"""
-
-LLMBasedFileEditTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='edit_file',
-        description=_FILE_EDIT_DESCRIPTION,
-        parameters={
-            'type': 'object',
-            'properties': {
-                'path': {
-                    'type': 'string',
-                    'description': 'The absolute path to the file to be edited.',
-                },
-                'new_content_draft': {
-                    'type': 'string',
-                    'description': 'A draft of the new content for the file being edited. Note that the assistant may skip unchanged lines.',
-                },
-                'start': {
-                    'type': 'integer',
-                    'description': 'The starting line number for the edit (1-indexed, inclusive). Default is 1.',
-                },
-                'end': {
-                    'type': 'integer',
-                    'description': 'The ending line number for the edit (1-indexed, inclusive). Default is -1 (end of file).',
-                },
-            },
-            'required': ['path', 'content'],
-        },
-    ),
-)
-
-_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files
-* State is persistent across command calls and discussions with the user
-* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
-* The `create` command cannot be used if the specified `path` already exists as a file
-* If a `command` generates a long output, it will be truncated and marked with `<response clipped>`
-* The `undo_edit` command will revert the last edit made to the file at `path`
-
-Notes for using the `str_replace` command:
-* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces!
-* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique
-* The `new_str` parameter should contain the edited lines that should replace the `old_str`
-"""
-
-StrReplaceEditorTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='str_replace_editor',
-        description=_STR_REPLACE_EDITOR_DESCRIPTION,
-        parameters={
-            'type': 'object',
-            'properties': {
-                'command': {
-                    'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
-                    'enum': ['view', 'create', 'str_replace', 'insert', 'undo_edit'],
-                    'type': 'string',
-                },
-                'path': {
-                    'description': 'Absolute path to file or directory, e.g. `/workspace/file.py` or `/workspace`.',
-                    'type': 'string',
-                },
-                'file_text': {
-                    'description': 'Required parameter of `create` command, with the content of the file to be created.',
-                    'type': 'string',
-                },
-                'old_str': {
-                    'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
-                    'type': 'string',
-                },
-                'new_str': {
-                    'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
-                    'type': 'string',
-                },
-                'insert_line': {
-                    'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
-                    'type': 'integer',
-                },
-                'view_range': {
-                    'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
-                    'items': {'type': 'integer'},
-                    'type': 'array',
-                },
-            },
-            'required': ['command', 'path'],
-        },
-    ),
-)
-
-
-_WEB_DESCRIPTION = """Read (convert to markdown) content from a webpage. You should prefer using the `webpage_read` tool over the `browser` tool, but do use the `browser` tool if you need to interact with a webpage (e.g., click a button, fill out a form, etc.).
-
-You may use the `webpage_read` tool to read content from a webpage, and even search the webpage content using a Google search query (e.g., url=`https://www.google.com/search?q=YOUR_QUERY`).
-"""
-
-WebReadTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='web_read',
-        description=_WEB_DESCRIPTION,
-        parameters={
-            'type': 'object',
-            'properties': {
-                'url': {
-                    'type': 'string',
-                    'description': 'The URL of the webpage to read. You can also use a Google search query here (e.g., `https://www.google.com/search?q=YOUR_QUERY`).',
-                }
-            },
-            'required': ['url'],
-        },
-    ),
-)
-
-# from browsergym/core/action/highlevel.py
-_browser_action_space = HighLevelActionSet(
-    subsets=['bid', 'nav'],
-    strict=False,  # less strict on the parsing of the actions
-    multiaction=True,  # enable to agent to take multiple actions at once
-)
-
-
-_BROWSER_DESCRIPTION = """Interact with the browser using Python code. Use it ONLY when you need to interact with a webpage.
-
-See the description of "code" parameter for more details.
-
-Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
-More than 2-3 actions usually leads to failure or unexpected behavior. Example:
-fill('a12', 'example with "quotes"')
-click('a51')
-click('48', button='middle', modifiers=['Shift'])
-"""
-
-_BROWSER_TOOL_DESCRIPTION = """
-The following 15 functions are available. Nothing else is supported.
-
-goto(url: str)
-    Description: Navigate to a url.
-    Examples:
-        goto('http://www.example.com')
-
-go_back()
-    Description: Navigate to the previous page in history.
-    Examples:
-        go_back()
-
-go_forward()
-    Description: Navigate to the next page in history.
-    Examples:
-        go_forward()
-
-noop(wait_ms: float = 1000)
-    Description: Do nothing, and optionally wait for the given time (in milliseconds).
-    You can use this to get the current page content and/or wait for the page to load.
-    Examples:
-        noop()
-
-        noop(500)
-
-scroll(delta_x: float, delta_y: float)
-    Description: Scroll horizontally and vertically. Amounts in pixels, positive for right or down scrolling, negative for left or up scrolling. Dispatches a wheel event.
-    Examples:
-        scroll(0, 200)
-
-        scroll(-50.2, -100.5)
-
-fill(bid: str, value: str)
-    Description: Fill out a form field. It focuses the element and triggers an input event with the entered text. It works for <input>, <textarea> and [contenteditable] elements.
-    Examples:
-        fill('237', 'example value')
-
-        fill('45', 'multi-line\nexample')
-
-        fill('a12', 'example with "quotes"')
-
-select_option(bid: str, options: str | list[str])
-    Description: Select one or multiple options in a <select> element. You can specify option value or label to select. Multiple options can be selected.
-    Examples:
-        select_option('a48', 'blue')
-
-        select_option('c48', ['red', 'green', 'blue'])
-
-click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'ControlOrMeta', 'Meta', 'Shift']] = [])
-    Description: Click an element.
-    Examples:
-        click('a51')
-
-        click('b22', button='right')
-
-        click('48', button='middle', modifiers=['Shift'])
-
-dblclick(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'ControlOrMeta', 'Meta', 'Shift']] = [])
-    Description: Double click an element.
-    Examples:
-        dblclick('12')
-
-        dblclick('ca42', button='right')
-
-        dblclick('178', button='middle', modifiers=['Shift'])
-
-hover(bid: str)
-    Description: Hover over an element.
-    Examples:
-        hover('b8')
-
-press(bid: str, key_comb: str)
-    Description: Focus the matching element and press a combination of keys. It accepts the logical key names that are emitted in the keyboardEvent.key property of the keyboard events: Backquote, Minus, Equal, Backslash, Backspace, Tab, Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp, ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can alternatively specify a single character you'd like to produce such as "a" or "#". Following modification shortcuts are also supported: Shift, Control, Alt, Meta, ShiftLeft, ControlOrMeta. ControlOrMeta resolves to Control on Windows and Linux and to Meta on macOS.
-    Examples:
-        press('88', 'Backspace')
-
-        press('a26', 'ControlOrMeta+a')
-
-        press('a61', 'Meta+Shift+t')
-
-focus(bid: str)
-    Description: Focus the matching element.
-    Examples:
-        focus('b455')
-
-clear(bid: str)
-    Description: Clear the input field.
-    Examples:
-        clear('996')
-
-drag_and_drop(from_bid: str, to_bid: str)
-    Description: Perform a drag & drop. Hover the element that will be dragged. Press left mouse button. Move mouse to the element that will receive the drop. Release left mouse button.
-    Examples:
-        drag_and_drop('56', '498')
-
-upload_file(bid: str, file: str | list[str])
-    Description: Click an element and wait for a "filechooser" event, then select one or multiple input files for upload. Relative file paths are resolved relative to the current working directory. An empty list clears the selected files.
-    Examples:
-        upload_file('572', '/home/user/my_receipt.pdf')
-
-        upload_file('63', ['/home/bob/Documents/image.jpg', '/home/bob/Documents/file.zip'])
-"""
-
-
-for _, action in _browser_action_space.action_set.items():
-    assert (
-        action.signature in _BROWSER_TOOL_DESCRIPTION
-    ), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.signature}'
-    assert (
-        action.description in _BROWSER_TOOL_DESCRIPTION
-    ), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.description}'
-
-BrowserTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='browser',
-        description=_BROWSER_DESCRIPTION,
-        parameters={
-            'type': 'object',
-            'properties': {
-                'code': {
-                    'type': 'string',
-                    'description': (
-                        'The Python code that interacts with the browser.\n'
-                        + _BROWSER_TOOL_DESCRIPTION
-                    ),
-                }
-            },
-            'required': ['code'],
-        },
-    ),
-)
-
-_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task."""
-
-FinishTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='finish',
-        description=_FINISH_DESCRIPTION,
-    ),
-)
-
-
-def combine_thought(action: Action, thought: str) -> Action:
-    if not hasattr(action, 'thought'):
-        return action
-    if thought:
-        action.thought = thought
-    return action
-
-
-def response_to_actions(response: ModelResponse) -> list[Action]:
-    actions: list[Action] = []
-    assert len(response.choices) == 1, 'Only one choice is supported for now'
-    assistant_msg = response.choices[0].message
-    if assistant_msg.tool_calls:
-        # Check if there's assistant_msg.content. If so, add it to the thought
-        thought = ''
-        if isinstance(assistant_msg.content, str):
-            thought = assistant_msg.content
-        elif isinstance(assistant_msg.content, list):
-            for msg in assistant_msg.content:
-                if msg['type'] == 'text':
-                    thought += msg['text']
-
-        # Process each tool call to OpenHands action
-        for i, tool_call in enumerate(assistant_msg.tool_calls):
-            action: Action
-            try:
-                arguments = json.loads(tool_call.function.arguments)
-            except json.decoder.JSONDecodeError as e:
-                raise RuntimeError(
-                    f'Failed to parse tool call arguments: {tool_call.function.arguments}'
-                ) from e
-            if tool_call.function.name == 'execute_bash':
-                action = CmdRunAction(**arguments)
-            elif tool_call.function.name == 'execute_ipython_cell':
-                action = IPythonRunCellAction(**arguments)
-            elif tool_call.function.name == 'delegate_to_browsing_agent':
-                action = AgentDelegateAction(
-                    agent='BrowsingAgent',
-                    inputs=arguments,
-                )
-            elif tool_call.function.name == 'finish':
-                action = AgentFinishAction()
-            elif tool_call.function.name == 'edit_file':
-                action = FileEditAction(**arguments)
-            elif tool_call.function.name == 'str_replace_editor':
-                # We implement this in agent_skills, which can be used via Jupyter
-                # convert tool_call.function.arguments to kwargs that can be passed to file_editor
-                code = f'print(file_editor(**{arguments}))'
-                logger.debug(
-                    f'TOOL CALL: str_replace_editor -> file_editor with code: {code}'
-                )
-                action = IPythonRunCellAction(code=code, include_extra=False)
-            elif tool_call.function.name == 'browser':
-                action = BrowseInteractiveAction(browser_actions=arguments['code'])
-            elif tool_call.function.name == 'web_read':
-                action = BrowseURLAction(url=arguments['url'])
-            else:
-                raise FunctionCallNotExistsError(
-                    f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
-                )
-
-            # We only add thought to the first action
-            if i == 0:
-                action = combine_thought(action, thought)
-            # Add metadata for tool calling
-            action.tool_call_metadata = ToolCallMetadata(
-                tool_call_id=tool_call.id,
-                function_name=tool_call.function.name,
-                model_response=response,
-                total_calls_in_response=len(assistant_msg.tool_calls),
-            )
-            actions.append(action)
-    else:
-        actions.append(
-            MessageAction(content=assistant_msg.content, wait_for_response=True)
-        )
-
-    assert len(actions) >= 1
-    return actions
-
-
-def get_tools(
-    codeact_enable_browsing: bool = False,
-    codeact_enable_llm_editor: bool = False,
-    codeact_enable_jupyter: bool = False,
-) -> list[ChatCompletionToolParam]:
-    tools = [CmdRunTool, FinishTool]
-    if codeact_enable_browsing:
-        tools.append(WebReadTool)
-        tools.append(BrowserTool)
-    if codeact_enable_jupyter:
-        tools.append(IPythonTool)
-    if codeact_enable_llm_editor:
-        tools.append(LLMBasedFileEditTool)
-    else:
-        tools.append(StrReplaceEditorTool)
-    return tools
@@ -1,6 +0,0 @@
-You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
-<IMPORTANT>
-* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
-* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
-* The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
-</IMPORTANT>
@@ -1,4 +0,0 @@
-from openhands.agenthub.delegator_agent.agent import DelegatorAgent
-from openhands.controller.agent import Agent
-
-Agent.register('DelegatorAgent', DelegatorAgent)
@@ -1,87 +0,0 @@
-from openhands.controller.agent import Agent
-from openhands.controller.state.state import State
-from openhands.core.config import AgentConfig
-from openhands.events.action import Action, AgentDelegateAction, AgentFinishAction
-from openhands.events.observation import AgentDelegateObservation, Observation
-from openhands.llm.llm import LLM
-
-
-class DelegatorAgent(Agent):
-    VERSION = '1.0'
-    """
-    The Delegator Agent is responsible for delegating tasks to other agents based on the current task.
-    """
-
-    current_delegate: str = ''
-
-    def __init__(self, llm: LLM, config: AgentConfig):
-        """Initialize the Delegator Agent with an LLM
-
-        Parameters:
-        - llm (LLM): The llm to be used by this agent
-        """
-        super().__init__(llm, config)
-
-    def step(self, state: State) -> Action:
-        """Checks to see if current step is completed, returns AgentFinishAction if True.
-        Otherwise, delegates the task to the next agent in the pipeline.
-
-        Parameters:
-        - state (State): The current state given the previous actions and observations
-
-        Returns:
-        - AgentFinishAction: If the last state was 'completed', 'verified', or 'abandoned'
-        - AgentDelegateAction: The next agent to delegate the task to
-        """
-        if self.current_delegate == '':
-            self.current_delegate = 'study'
-            task, _ = state.get_current_user_intent()
-            return AgentDelegateAction(
-                agent='StudyRepoForTaskAgent', inputs={'task': task}
-            )
-
-        # last observation in history should be from the delegate
-        last_observation = None
-        for event in reversed(state.history):
-            if isinstance(event, Observation):
-                last_observation = event
-                break
-
-        if not isinstance(last_observation, AgentDelegateObservation):
-            raise Exception('Last observation is not an AgentDelegateObservation')
-
-        goal, _ = state.get_current_user_intent()
-        if self.current_delegate == 'study':
-            self.current_delegate = 'coder'
-            return AgentDelegateAction(
-                agent='CoderAgent',
-                inputs={
-                    'task': goal,
-                    'summary': last_observation.outputs['summary'],
-                },
-            )
-        elif self.current_delegate == 'coder':
-            self.current_delegate = 'verifier'
-            return AgentDelegateAction(
-                agent='VerifierAgent',
-                inputs={
-                    'task': goal,
-                },
-            )
-        elif self.current_delegate == 'verifier':
-            if (
-                'completed' in last_observation.outputs
-                and last_observation.outputs['completed']
-            ):
-                return AgentFinishAction()
-            else:
-                self.current_delegate = 'coder'
-                return AgentDelegateAction(
-                    agent='CoderAgent',
-                    inputs={
-                        'task': goal,
-                        'summary': last_observation.outputs['summary'],
-                    },
-                )
-        else:
-            raise Exception('Invalid delegate state')
@@ -1,4 +0,0 @@
-from openhands.agenthub.dummy_agent.agent import DummyAgent
-from openhands.controller.agent import Agent
-
-Agent.register('DummyAgent', DummyAgent)
@@ -1,211 +0,0 @@
-from typing import TypedDict, Union
-
-from openhands.controller.agent import Agent
-from openhands.controller.state.state import State
-from openhands.core.config import AgentConfig
-from openhands.core.schema import AgentState
-from openhands.events.action import (
-    Action,
-    AddTaskAction,
-    AgentFinishAction,
-    AgentRejectAction,
-    BrowseInteractiveAction,
-    BrowseURLAction,
-    CmdRunAction,
-    FileReadAction,
-    FileWriteAction,
-    MessageAction,
-    ModifyTaskAction,
-)
-from openhands.events.observation import (
-    AgentStateChangedObservation,
-    CmdOutputObservation,
-    FileReadObservation,
-    FileWriteObservation,
-    NullObservation,
-    Observation,
-)
-from openhands.events.serialization.event import event_to_dict
-from openhands.llm.llm import LLM
-
-"""
-FIXME: There are a few problems this surfaced
-* FileWrites seem to add an unintended newline at the end of the file
-* Browser not working
-"""
-
-ActionObs = TypedDict(
-    'ActionObs', {'action': Action, 'observations': list[Observation]}
-)
-
-
-class DummyAgent(Agent):
-    VERSION = '1.0'
-    """
-    The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
-    without making any LLM calls.
-    """
-
-    def __init__(self, llm: LLM, config: AgentConfig):
-        super().__init__(llm, config)
-        self.steps: list[ActionObs] = [
-            {
-                'action': AddTaskAction(
-                    parent='None', goal='check the current directory'
-                ),
-                'observations': [],
-            },
-            {
-                'action': AddTaskAction(parent='0', goal='run ls'),
-                'observations': [],
-            },
-            {
-                'action': ModifyTaskAction(task_id='0', state='in_progress'),
-                'observations': [],
-            },
-            {
-                'action': MessageAction('Time to get started!'),
-                'observations': [],
-            },
-            {
-                'action': CmdRunAction(command='echo "foo"'),
-                'observations': [
-                    CmdOutputObservation(
-                        'foo', command_id=-1, command='echo "foo"', exit_code=0
-                    )
-                ],
-            },
-            {
-                'action': FileWriteAction(
-                    content='echo "Hello, World!"', path='hello.sh'
-                ),
-                'observations': [
-                    FileWriteObservation(
-                        content='echo "Hello, World!"', path='hello.sh'
-                    )
-                ],
-            },
-            {
-                'action': FileReadAction(path='hello.sh'),
-                'observations': [
-                    FileReadObservation('echo "Hello, World!"\n', path='hello.sh')
-                ],
-            },
-            {
-                'action': CmdRunAction(command='bash hello.sh'),
-                'observations': [
-                    CmdOutputObservation(
-                        'bash: hello.sh: No such file or directory',
-                        command_id=-1,
-                        command='bash workspace/hello.sh',
-                        exit_code=127,
-                    )
-                ],
-            },
-            {
-                'action': BrowseURLAction(url='https://google.com'),
-                'observations': [
-                    # BrowserOutputObservation('<html><body>Simulated Google page</body></html>',url='https://google.com',screenshot=''),
-                ],
-            },
-            {
-                'action': BrowseInteractiveAction(
-                    browser_actions='goto("https://google.com")'
-                ),
-                'observations': [
-                    # BrowserOutputObservation('<html><body>Simulated Google page after interaction</body></html>',url='https://google.com',screenshot=''),
-                ],
-            },
-            {
-                'action': AgentRejectAction(),
-                'observations': [NullObservation('')],
-            },
-            {
-                'action': AgentFinishAction(
-                    outputs={}, thought='Task completed', action='finish'
-                ),
-                'observations': [AgentStateChangedObservation('', AgentState.FINISHED)],
-            },
-        ]
-
-    def step(self, state: State) -> Action:
-        if state.iteration >= len(self.steps):
-            return AgentFinishAction()
-
-        current_step = self.steps[state.iteration]
-        action = current_step['action']
-
-        # If the action is AddTaskAction or ModifyTaskAction, update the parent ID or task_id
-        if isinstance(action, AddTaskAction):
-            if action.parent == 'None':
-                action.parent = ''  # Root task has no parent
-            elif action.parent == '0':
-                action.parent = state.root_task.id
-            elif action.parent.startswith('0.'):
-                action.parent = f'{state.root_task.id}{action.parent[1:]}'
-        elif isinstance(action, ModifyTaskAction):
-            if action.task_id == '0':
-                action.task_id = state.root_task.id
-            elif action.task_id.startswith('0.'):
-                action.task_id = f'{state.root_task.id}{action.task_id[1:]}'
-            # Ensure the task_id doesn't start with a dot
-            if action.task_id.startswith('.'):
-                action.task_id = action.task_id[1:]
-        elif isinstance(action, (BrowseURLAction, BrowseInteractiveAction)):
-            try:
-                return self.simulate_browser_action(action)
-            except (
-                Exception
-            ):  # This could be a specific exception for browser unavailability
-                return self.handle_browser_unavailable(action)
-
-        if state.iteration > 0:
-            prev_step = self.steps[state.iteration - 1]
-
-            if 'observations' in prev_step and prev_step['observations']:
-                expected_observations = prev_step['observations']
-                hist_events = state.history[-len(expected_observations) :]
-
-                if len(hist_events) < len(expected_observations):
-                    print(
-                        f'Warning: Expected {len(expected_observations)} observations, but got {len(hist_events)}'
-                    )
-
-                for i in range(min(len(expected_observations), len(hist_events))):
-                    hist_obs = event_to_dict(hist_events[i])
-                    expected_obs = event_to_dict(expected_observations[i])
-
-                    # Remove dynamic fields for comparison
-                    for obs in [hist_obs, expected_obs]:
-                        obs.pop('id', None)
-                        obs.pop('timestamp', None)
-                        obs.pop('cause', None)
-                        obs.pop('source', None)
-                        if 'extras' in obs:
-                            obs['extras'].pop('command_id', None)
-
-                    if hist_obs != expected_obs:
-                        print(
-                            f'Warning: Observation mismatch. Expected {expected_obs}, got {hist_obs}'
-                        )
-
-        return action
-
-    def simulate_browser_action(
-        self, action: Union[BrowseURLAction, BrowseInteractiveAction]
-    ) -> Action:
-        # Instead of simulating, we'll reject the browser action
-        return self.handle_browser_unavailable(action)
-
-    def handle_browser_unavailable(
-        self, action: Union[BrowseURLAction, BrowseInteractiveAction]
-    ) -> Action:
-        # Create a message action to inform that browsing is not available
-        message = 'Browser actions are not available in the DummyAgent environment.'
-        if isinstance(action, BrowseURLAction):
-            message += f' Unable to browse URL: {action.url}'
-        elif isinstance(action, BrowseInteractiveAction):
-            message += (
-                f' Unable to perform interactive browsing: {action.browser_actions}'
-            )
-        return MessageAction(content=message)
@@ -1,82 +0,0 @@
-from jinja2 import BaseLoader, Environment
-
-from openhands.agenthub.micro.instructions import instructions
-from openhands.agenthub.micro.registry import all_microagents
-from openhands.controller.agent import Agent
-from openhands.controller.state.state import State
-from openhands.core.config import AgentConfig
-from openhands.core.message import ImageContent, Message, TextContent
-from openhands.core.utils import json
-from openhands.events.action import Action
-from openhands.events.event import Event
-from openhands.events.serialization.action import action_from_dict
-from openhands.events.serialization.event import event_to_memory
-from openhands.llm.llm import LLM
-
-
-def parse_response(orig_response: str) -> Action:
-    # attempt to load the JSON dict from the response
-    action_dict = json.loads(orig_response)
-
-    # load the action from the dict
-    return action_from_dict(action_dict)
-
-
-def to_json(obj, **kwargs):
-    """Serialize an object to str format"""
-    return json.dumps(obj, **kwargs)
-
-
-class MicroAgent(Agent):
-    VERSION = '1.0'
-    prompt = ''
-    agent_definition: dict = {}
-
-    def history_to_json(self, history: list[Event], max_events: int = 20, **kwargs):
-        """
-        Serialize and simplify history to str format
-        """
-        processed_history = []
-        event_count = 0
-
-        for event in reversed(history):
-            if event_count >= max_events:
-                break
-            processed_history.append(
-                event_to_memory(event, self.llm.config.max_message_chars)
-            )
-            event_count += 1
-
-        # history is in reverse order, let's fix it
-        processed_history.reverse()
-
-        return json.dumps(processed_history, **kwargs)
-
-    def __init__(self, llm: LLM, config: AgentConfig):
-        super().__init__(llm, config)
-        if 'name' not in self.agent_definition:
-            raise ValueError('Agent definition must contain a name')
-        self.prompt_template = Environment(loader=BaseLoader).from_string(self.prompt)
-        self.delegates = all_microagents.copy()
-        del self.delegates[self.agent_definition['name']]
-
-    def step(self, state: State) -> Action:
-        last_user_message, last_image_urls = state.get_current_user_intent()
-        prompt = self.prompt_template.render(
-            state=state,
-            instructions=instructions,
-            to_json=to_json,
-            history_to_json=self.history_to_json,
-            delegates=self.delegates,
-            latest_user_message=last_user_message,
-        )
-        content = [TextContent(text=prompt)]
-        if self.llm.vision_is_active() and last_image_urls:
-            content.append(ImageContent(image_urls=last_image_urls))
-        message = Message(role='user', content=content)
-        resp = self.llm.completion(
-            messages=self.llm.format_messages_for_llm(message),
-        )
-        action_resp = resp['choices'][0]['message']['content']
-        action = parse_response(action_resp)
-        return action
@@ -1,6 +0,0 @@
-name: CoderAgent
-description: Given a particular task, and a detailed description of the codebase, accomplishes the task
-inputs:
-  task: string
-  summary: string
-outputs: {}
@@ -1,6 +0,0 @@
-name: CommitWriterAgent
-description: "Write a git commit message for files in the git staging area"
-inputs: {}
-outputs:
-  answer: string
-  reason: string
@@ -1,22 +0,0 @@
-import os
-
-instructions: dict = {}
-
-base_dir = os.path.dirname(os.path.abspath(__file__)) + '/_instructions'
-for root, dirs, files in os.walk(base_dir):
-    if len(files) == 0:
-        continue
-    if root == base_dir:
-        obj = instructions
-    else:
-        rel_base = os.path.relpath(root, base_dir)
-        keys = rel_base.split('/')
-        obj = instructions
-        for key in keys:
-            if key not in obj:
-                obj[key] = {}
-            obj = obj[key]
-    for file in files:
-        without_ext = os.path.splitext(file)[0]
-        with open(os.path.join(root, file), 'r') as f:
-            obj[without_ext] = f.read()
@@ -1,8 +0,0 @@
-name: ManagerAgent
-description: Delegates tasks to microagents based on their area of expertise
-generates: Action
-inputs:
-  task: string
-outputs:
-  summary: string # if finished
-  reason: string # if rejected
@@ -1,24 +0,0 @@
-name: MathAgent
-description: "Solves simple and complex math problems using python"
-container: python:3.12.3-bookworm
-inputs:
-  task: string
-outputs:
-  answer: string
-examples:
-  - inputs:
-      task: "What is 2 + 2?"
-    outputs:
-      answer: "4"
-  - inputs:
-      task: "What is the area of a circle with radius 7.324 inches?"
-    output:
-      answer: "168.518 square inches"
-  - inputs:
-      task: "What day of the week is 2099-01-01?"
-    outputs:
-      answer: "Saturday"
-  - inputs:
-      task: "What is the integral of sin(x^2) evaluated from -1 to 1?"
-    outputs:
-      answer: "0.603848"
@@ -1,5 +0,0 @@
-name: PostgresAgent
-description: Writes and maintains PostgreSQL migrations
-inputs:
-  task: string
-outputs: {}
@@ -1,27 +0,0 @@
-import os
-
-import yaml
-
-all_microagents = {}
-
-# Get the list of directories and sort them to preserve determinism
-dirs = sorted(os.listdir(os.path.dirname(__file__)))
-
-for dir in dirs:
-    base = os.path.dirname(__file__) + '/' + dir
-    if os.path.isfile(base):
-        continue
-    if dir.startswith('_'):
-        continue
-    promptFile = base + '/prompt.md'
-    agentFile = base + '/agent.yaml'
-    if not os.path.isfile(promptFile) or not os.path.isfile(agentFile):
-        raise Exception(f'Missing prompt or agent file in {base}. Please create them.')
-    with open(promptFile, 'r') as f:
-        prompt = f.read()
-    with open(agentFile, 'r') as f:
-        agent = yaml.safe_load(f)
-    if 'name' not in agent:
-        raise Exception(f'Missing name in {agentFile}')
-    agent['prompt'] = prompt
-    all_microagents[agent['name']] = agent
@@ -1,5 +0,0 @@
-name: RepoExplorerAgent
-description: Generates a detailed summary of an existing codebase
-inputs: {}
-outputs:
-  summary: string
@@ -1,6 +0,0 @@
-name: StudyRepoForTaskAgent
-description: Given a particular task, finds and describes all relevant parts of the codebase
-inputs:
-  task: string
-outputs:
-  summary: string
@@ -1,6 +0,0 @@
-name: TypoFixerAgent
-description: Fixes typos in files in the current working directory
-inputs:
-  task: string
-outputs:
-  summary: string
@@ -1,7 +0,0 @@
-name: VerifierAgent
-description: Given a particular task, verifies that the task has been completed
-inputs:
-  task: string
-outputs:
-  completed: boolean
-  summary: string
@@ -0,0 +1,2 @@
+.envrc
+workspace
@@ -0,0 +1,8 @@
+# LLM control loop
+This is currently a standalone utility. It will need to be integrated into OpenDevin's backend.
+
+## Usage
+```bash
+# Run this in project root
+./agenthub/monologue_agent/build-and-run.sh "write a bash script that prints 'hello world'"
+```
@@ -0,0 +1,9 @@
+# TODO
+There's a lot of low-hanging fruit for this agent:
+
+* Strip `<script>`, `<style>`, and other non-text tags from the HTML before sending it to the LLM
+* Keep track of the working directory when the agent uses `cd`
+* Improve memory condensing--condense earlier memories more aggressively
+* Limit the time that `run` can wait (in case agent runs an interactive command and it's hanging)
+* Figure out how to run background processes, e.g. `node server.js` to start a server
+
@@ -0,0 +1,4 @@
+from opendevin.agent import Agent
+from .agent import MonologueAgent
+
+Agent.register("MonologueAgent", MonologueAgent)
@@ -0,0 +1,172 @@
+from typing import List
+from opendevin.agent import Agent
+from opendevin.state import State
+from opendevin.llm.llm import LLM
+
+from opendevin.action import (
+    Action,
+    NullAction,
+    CmdRunAction,
+    FileWriteAction,
+    FileReadAction,
+    AgentRecallAction,
+    BrowseURLAction,
+    AgentThinkAction,
+)
+
+from opendevin.observation import (
+    Observation,
+    NullObservation,
+    CmdOutputObservation,
+    FileReadObservation,
+    AgentRecallObservation,
+    BrowserOutputObservation,
+)
+
+import agenthub.monologue_agent.utils.prompts as prompts
+from agenthub.monologue_agent.utils.monologue import Monologue
+from agenthub.monologue_agent.utils.memory import LongTermMemory
+
+MAX_MONOLOGUE_LENGTH = 20000
+MAX_OUTPUT_LENGTH = 5000
+
+INITIAL_THOUGHTS = [
+    "I exist!",
+    "Hmm...looks like I can type in a command line prompt",
+    "Looks like I have a web browser too!",
+    "Here's what I want to do: $TASK",
+    "How am I going to get there though?",
+    "It seems like I have some kind of short term memory.",
+    "Each of my thoughts seems to be stored in a JSON array.",
+    "It seems whatever I say next will be added as an object to the list.",
+    "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
+    "Fortunately I have long term memory!",
+    "I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!",
+    "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
+    "Let's try it out!",
+    "RECALL what it is I want to do",
+    "Here's what I want to do: $TASK",
+    "How am I going to get there though?",
+    "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
+    'RUN echo "hello world"',
+    "hello world",
+    "Cool! I bet I can write files too using the write action.",
+    "WRITE echo \"console.log('hello world')\" > test.js",
+    "",
+    "I just created test.js. I'll try and run it now.",
+    "RUN node test.js",
+    "hello world",
+    "It works!",
+    "I'm going to try reading it now using the read action.",
+    "READ test.js",
+    "console.log('hello world')",
+    "Nice! I can read files too!",
+    "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
+    "Let's try that...",
+    "BROWSE google.com",
+    '<form><input type="text"></input><button type="submit"></button></form>',
+    "I can browse the web too!",
+    "And once I have completed my task, I can use the finish action to stop working.",
+    "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
+    "Very cool. Now to accomplish my task.",
+    "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
+    "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
+    "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
+    "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here.",
+]
+
+
+class MonologueAgent(Agent):
+    _initialized = False
+
+    def __init__(self, llm: LLM):
+        super().__init__(llm)
+        self.monologue = Monologue()
+        self.memory = LongTermMemory()
+
+    def _add_event(self, event: dict):
+        if "extras" in event and "screenshot" in event["extras"]:
+            del event["extras"]["screenshot"]
+        if 'args' in event and 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH:
+            event['args']['output'] = event['args']['output'][:MAX_OUTPUT_LENGTH] + "..."
+
+        self.monologue.add_event(event)
+        self.memory.add_event(event)
+        if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
+            self.monologue.condense(self.llm)
+
+    def _initialize(self, task):
+        if self._initialized:
+            return
+
+        if task is None or task == "":
+            raise ValueError("Instruction must be provided")
+        self.monologue = Monologue()
+        self.memory = LongTermMemory()
+
+        output_type = ""
+        for thought in INITIAL_THOUGHTS:
+            thought = thought.replace("$TASK", task)
+            if output_type != "":
+                observation: Observation = NullObservation(content="")
+                if output_type == "run":
+                    observation = CmdOutputObservation(content=thought, command_id=0, command="")
+                elif output_type == "read":
+                    observation = FileReadObservation(content=thought, path="")
+                elif output_type == "recall":
+                    observation = AgentRecallObservation(content=thought, memories=[])
+                elif output_type == "browse":
+                    observation = BrowserOutputObservation(content=thought, url="", screenshot="")
+                self._add_event(observation.to_dict())
+                output_type = ""
+            else:
+                action: Action = NullAction()
+                if thought.startswith("RUN"):
+                    command = thought.split("RUN ")[1]
+                    action = CmdRunAction(command)
+                    output_type = "run"
+                elif thought.startswith("WRITE"):
+                    parts = thought.split("WRITE ")[1].split(" > ")
+                    path = parts[1]
+                    content = parts[0]
+                    action = FileWriteAction(path=path, content=content)
+                elif thought.startswith("READ"):
+                    path = thought.split("READ ")[1]
+                    action = FileReadAction(path=path)
+                    output_type = "read"
+                elif thought.startswith("RECALL"):
+                    query = thought.split("RECALL ")[1]
+                    action = AgentRecallAction(query=query)
+                    output_type = "recall"
+                elif thought.startswith("BROWSE"):
+                    url = thought.split("BROWSE ")[1]
+                    action = BrowseURLAction(url=url)
+                    output_type = "browse"
+                else:
+                    action = AgentThinkAction(thought=thought)
+                self._add_event(action.to_dict())
+        self._initialized = True
+
+    def step(self, state: State) -> Action:
+        self._initialize(state.plan.main_goal)
+        for prev_action, obs in state.updated_info:
+            self._add_event(prev_action.to_dict())
+            self._add_event(obs.to_dict())
+
+        state.updated_info = []
+
+        prompt = prompts.get_request_action_prompt(
+            state.plan.main_goal,
+            self.monologue.get_thoughts(),
+            state.background_commands_obs,
+        )
+        messages = [{"content": prompt,"role": "user"}]
+        resp = self.llm.completion(messages=messages)
+        action_resp = resp['choices'][0]['message']['content']
+        action = prompts.parse_action_response(action_resp)
+        self.latest_action = action
+        return action
+
+    def search_memory(self, query: str) -> List[str]:
+        return self.memory.search(query)
+
@@ -0,0 +1,14 @@
+import json
+from json_repair import repair_json
+
+def my_encoder(obj):
+    if hasattr(obj, "to_dict"):
+        return obj.to_dict()
+
+def dumps(obj, **kwargs):
+    return json.dumps(obj, default=my_encoder, **kwargs)
+
+def loads(s, **kwargs):
+    s_repaired = repair_json(s)
+    return json.loads(s_repaired, **kwargs)
+
@@ -0,0 +1,79 @@
+import chromadb
+from llama_index.core import Document
+from llama_index.core.retrievers import VectorIndexRetriever
+from llama_index.core import VectorStoreIndex
+from llama_index.vector_stores.chroma import ChromaVectorStore
+
+from opendevin import config
+from . import json
+
+embedding_strategy = config.get("LLM_EMBEDDING_MODEL")
+
+# TODO: More embeddings: https://docs.llamaindex.ai/en/stable/examples/embeddings/OpenAI/
+# There's probably a more programmatic way to do this.
+if embedding_strategy == "llama2":
+    from llama_index.embeddings.ollama import OllamaEmbedding
+    embed_model = OllamaEmbedding(
+        model_name="llama2",
+        base_url=config.get_or_error("LLM_BASE_URL"),
+        ollama_additional_kwargs={"mirostat": 0},
+    )
+elif embedding_strategy == "openai":
+    from llama_index.embeddings.openai import OpenAIEmbedding
+    embed_model = OpenAIEmbedding(
+        model="text-embedding-ada-002"
+    )
+elif embedding_strategy == "azureopenai":
+    from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding  # Need to instruct to set these env variables in documentation
+    embed_model = AzureOpenAIEmbedding(
+        model="text-embedding-ada-002",
+        deployment_name=config.get_or_error("LLM_DEPLOYMENT_NAME"),
+        api_key=config.get_or_error("LLM_API_KEY"),
+        azure_endpoint=config.get_or_error("LLM_BASE_URL"),
+        api_version=config.get_or_error("LLM_API_VERSION"),
+    )
+else:
+    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+    embed_model = HuggingFaceEmbedding(
+        model_name="BAAI/bge-small-en-v1.5"
+    )
+
+
+class LongTermMemory:
+    def __init__(self):
+        db = chromadb.Client()
+        self.collection = db.get_or_create_collection(name="memories")
+        vector_store = ChromaVectorStore(chroma_collection=self.collection)
+        self.index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
+        self.thought_idx = 0
+
+    def add_event(self, event):
+        id = ""
+        t = ""
+        if "action" in event:
+            t = "action"
+            id = event["action"]
+        elif "observation" in event:
+            t = "observation"
+            id = event["observation"]
+        doc = Document(
+            text=json.dumps(event),
+            doc_id=str(self.thought_idx),
+            extra_info={
+                "type": t,
+                "id": id,
+                "idx": self.thought_idx,
+            },
+        )
+        self.thought_idx += 1
+        self.index.insert(doc)
+
+    def search(self, query, k=10):
+        retriever = VectorIndexRetriever(
+            index=self.index,
+            similarity_top_k=k,
+        )
+        results = retriever.retrieve(query)
+        return [r.get_text() for r in results]
+
+
@@ -0,0 +1,40 @@
+import traceback
+
+import agenthub.monologue_agent.utils.json as json
+import agenthub.monologue_agent.utils.prompts as prompts
+
+class Monologue:
+    def __init__(self):
+        self.thoughts = []
+
+    def add_event(self, t: dict):
+        if not isinstance(t, dict):
+            raise ValueError("Event must be a dictionary")
+        self.thoughts.append(t)
+
+    def get_thoughts(self):
+        return self.thoughts
+
+    def get_total_length(self):
+        total_length = 0
+        for t in self.thoughts:
+            try:
+                total_length += len(json.dumps(t))
+            except TypeError as e:
+                print(f"Error serializing thought: {e}")
+        return total_length
+
+    def condense(self, llm):
+        try:
+            prompt = prompts.get_summarize_monologue_prompt(self.thoughts)
+            messages = [{"content": prompt,"role": "user"}]
+            resp = llm.completion(messages=messages)
+            summary_resp = resp['choices'][0]['message']['content']
+            self.thoughts = prompts.parse_summary_response(strip_markdown(summary_resp))
+        except Exception as e:
+            traceback.print_exc()
+            raise RuntimeError(f"Error condensing thoughts: {e}")
+
+def strip_markdown(markdown_json):
+    # remove markdown code block
+    return markdown_json.replace('```json\n', '').replace('```', '').strip()
@@ -0,0 +1,139 @@
+from typing import List
+
+from . import json
+
+from opendevin.action import (
+    action_from_dict,
+    Action,
+)
+from opendevin.observation import (
+    CmdOutputObservation,
+)
+
+ACTION_PROMPT = """
+You're a thoughtful robot. Your main task is this:
+
+%(task)s
+
+Don't expand the scope of your task--just complete it as written.
+
+This is your internal monologue, in JSON format:
+
+%(monologue)s
+
+
+Your most recent thought is at the bottom of that monologue. Continue your train of thought.
+What is your next thought or action? Your response must be in JSON format.
+It must be an object, and it must contain two fields:
+* `action`, which is one of the actions below
+* `args`, which is a map of key-value pairs, specifying the arguments for that action
+
+Here are the possible actions:
+* `read` - reads the content of a file. Arguments:
+  * `path` - the path of the file to read
+* `write` - writes the content to a file. Arguments:
+  * `path` - the path of the file to write
+  * `content` - the content to write to the file
+* `run` - runs a command. Arguments:
+  * `command` - the command to run
+  * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
+* `kill` - kills a background command
+  * `id` - the ID of the background command to kill
+* `browse` - opens a web page. Arguments:
+  * `url` - the URL to open
+* `recall` - recalls a past memory. Arguments:
+  * `query` - the query to search for
+* `think` - make a plan, set a goal, or record your thoughts. Arguments:
+  * `thought` - the thought to record
+* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
+
+%(background_commands)s
+
+You MUST take time to think in between read, write, run, browse, and recall actions.
+You should never act twice in a row without thinking. But if your last several
+actions are all "think" actions, you should consider taking a different action.
+
+Notes:
+* your environment is Debian Linux. You can install software with `apt`
+* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory.
+* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
+
+What is your next thought or action? Again, you must reply with JSON, and only with JSON.
+
+%(hint)s
+"""
+
+MONOLOGUE_SUMMARY_PROMPT = """
+Below is the internal monologue of an automated LLM agent. Each
+thought is an item in a JSON array. The thoughts may be memories,
+actions taken by the agent, or outputs from those actions.
+Please return a new, smaller JSON array, which summarizes the
+internal monologue. You can summarize individual thoughts, and
+you can condense related thoughts together with a description
+of their content.
+
+%(monologue)s
+
+Make the summaries as pithy and informative as possible.
+Be specific about what happened and what was learned. The summary
+will be used as keywords for searching for the original memory.
+Be sure to preserve any key words or important information.
+
+Your response must be in JSON format. It must be an object with the
+key `new_monologue`, which is a JSON array containing the summarized monologue.
+Each entry in the array must have an `action` key, and an `args` key.
+The action key may be `summarize`, and `args.summary` should contain the summary.
+You can also use the same action and args from the source monologue.
+"""
+
+
+def get_summarize_monologue_prompt(thoughts):
+    return MONOLOGUE_SUMMARY_PROMPT % {
+        'monologue': json.dumps({'old_monologue': thoughts}, indent=2),
+    }
+
+def get_request_action_prompt(
+        task: str,
+        thoughts: List[dict],
+        background_commands_obs: List[CmdOutputObservation] = [],
+):
+    hint = ''
+    if len(thoughts) > 0:
+        latest_thought = thoughts[-1]
+        if "action" in latest_thought:
+            if latest_thought["action"] == 'think':
+                if latest_thought["args"]['thought'].startswith("OK so my task is"):
+                    hint = "You're just getting started! What should you do first?"
+                else:
+                    hint = "You've been thinking a lot lately. Maybe it's time to take action?"
+            elif latest_thought["action"] == 'error':
+                hint = "Looks like that last command failed. Maybe you need to fix it, or try something else."
+
+    bg_commands_message = ""
+    if len(background_commands_obs) > 0:
+        bg_commands_message = "The following commands are running in the background:"
+        for command_obs in background_commands_obs:
+            bg_commands_message += f"\n`{command_obs.command_id}`: {command_obs.command}"
+        bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."
+    latest_thought = thoughts[-1]
+
+    return ACTION_PROMPT % {
+        'task': task,
+        'monologue': json.dumps(thoughts, indent=2),
+        'background_commands': bg_commands_message,
+        'hint': hint,
+    }
+
+def parse_action_response(response: str) -> Action:
+    json_start = response.find("{")
+    json_end = response.rfind("}") + 1
+    response = response[json_start:json_end]
+    action_dict = json.loads(response)
+    if 'content' in action_dict:
+        # The LLM gets confused here. Might as well be robust
+        action_dict['contents'] = action_dict.pop('content')
+    return action_from_dict(action_dict)
+
+def parse_summary_response(response: str) -> List[dict]:
+    parsed = json.loads(response)
+    return parsed['new_monologue']
@@ -1,4 +1,4 @@
-from openhands.agenthub.planner_agent.agent import PlannerAgent
-from openhands.controller.agent import Agent
+from opendevin.agent import Agent
+from .agent import PlannerAgent

-Agent.register('PlannerAgent', PlannerAgent)
+Agent.register("PlannerAgent", PlannerAgent)
@@ -1,53 +1,26 @@
-from openhands.agenthub.planner_agent.prompt import get_prompt_and_images
-from openhands.agenthub.planner_agent.response_parser import PlannerResponseParser
-from openhands.controller.agent import Agent
-from openhands.controller.state.state import State
-from openhands.core.config import AgentConfig
-from openhands.core.message import ImageContent, Message, TextContent
-from openhands.events.action import Action, AgentFinishAction
-from openhands.llm.llm import LLM
+from typing import List
+from .prompt import get_prompt, parse_response

+from opendevin.agent import Agent
+from opendevin.action import AgentFinishAction
+from opendevin.llm.llm import LLM
+from opendevin.state import State
+from opendevin.action import Action

 class PlannerAgent(Agent):
-    VERSION = '1.0'
-    """
-    The planner agent utilizes a special prompting strategy to create long term plans for solving problems.
-    The agent is given its previous action-observation pairs, current task, and hint based on last action taken at every step.
-    """
-    response_parser = PlannerResponseParser()
-
-    def __init__(self, llm: LLM, config: AgentConfig):
-        """Initialize the Planner Agent with an LLM
-
-        Parameters:
-        - llm (LLM): The llm to be used by this agent
-        """
-        super().__init__(llm, config)
+    def __init__(self, llm: LLM):
+        super().__init__(llm)

    def step(self, state: State) -> Action:
-        """Checks to see if current step is completed, returns AgentFinishAction if True.
-        Otherwise, creates a plan prompt and sends to model for inference, returning the result as the next action.
-
-        Parameters:
-        - state (State): The current state given the previous actions and observations
-
-        Returns:
-        - AgentFinishAction: If the last state was 'completed', 'verified', or 'abandoned'
-        - Action: The next action to take based on llm response
-        """
-        if state.root_task.state in [
-            'completed',
-            'verified',
-            'abandoned',
-        ]:
+        if state.plan.task.state in ['completed', 'verified', 'abandoned']:
            return AgentFinishAction()
+        prompt = get_prompt(state.plan, state.history)
+        messages = [{"content": prompt, "role": "user"}]
+        resp = self.llm.completion(messages=messages)
+        action_resp = resp['choices'][0]['message']['content']
+        action = parse_response(action_resp)
+        return action
+
+    def search_memory(self, query: str) -> List[str]:
+        return []

-        prompt, image_urls = get_prompt_and_images(
-            state, self.llm.config.max_message_chars
-        )
-        content = [TextContent(text=prompt)]
-        if self.llm.vision_is_active() and image_urls:
-            content.append(ImageContent(image_urls=image_urls))
-        message = Message(role='user', content=content)
-        resp = self.llm.completion(messages=self.llm.format_messages_for_llm(message))
-        return self.response_parser.parse(resp)
@@ -1,15 +1,45 @@
-from openhands.controller.state.state import State
-from openhands.core.logger import openhands_logger as logger
-from openhands.core.schema import ActionType
-from openhands.core.utils import json
-from openhands.events.action import (
-    Action,
-    NullAction,
-)
-from openhands.events.serialization.action import action_from_dict
-from openhands.events.serialization.event import event_to_memory
+import json
+from typing import List, Tuple, Dict, Type

-HISTORY_SIZE = 20
+from opendevin.controller.agent_controller import print_with_color
+from opendevin.plan import Plan
+from opendevin.action import Action, action_from_dict
+from opendevin.observation import Observation
+
+from opendevin.action import (
+    NullAction,
+    CmdRunAction,
+    CmdKillAction,
+    BrowseURLAction,
+    FileReadAction,
+    FileWriteAction,
+    AgentRecallAction,
+    AgentThinkAction,
+    AgentFinishAction,
+    AgentSummarizeAction,
+    AddTaskAction,
+    ModifyTaskAction,
+)
+
+from opendevin.observation import (
+    NullObservation,
+)
+
+ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
+    "run": CmdRunAction,
+    "kill": CmdKillAction,
+    "browse": BrowseURLAction,
+    "read": FileReadAction,
+    "write": FileWriteAction,
+    "recall": AgentRecallAction,
+    "think": AgentThinkAction,
+    "summarize": AgentSummarizeAction,
+    "finish": AgentFinishAction,
+    "add_task": AddTaskAction,
+    "modify_task": ModifyTaskAction,
+}
+
+HISTORY_SIZE = 10

 prompt = """
 # Task
@@ -74,118 +104,100 @@ It must be an object, and it must contain two fields:
  * `content` - the content to write to the file
 * `run` - runs a command on the command line in a Linux shell. Arguments:
  * `command` - the command to run
+  * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
+* `kill` - kills a background command
+  * `id` - the ID of the background command to kill
 * `browse` - opens a web page. Arguments:
  * `url` - the URL to open
-* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
-  * `content` - the message to record
-  * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
+* `think` - make a plan, set a goal, or record your thoughts. Arguments:
+  * `thought` - the thought to record
 * `add_task` - add a task to your plan. Arguments:
-  * `parent` - the ID of the parent task (leave empty if it should go at the top level)
+  * `parent` - the ID of the parent task
  * `goal` - the goal of the task
  * `subtasks` - a list of subtasks, each of which is a map with a `goal` key.
 * `modify_task` - close a task. Arguments:
-  * `task_id` - the ID of the task to close
+  * `id` - the ID of the task to close
  * `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now.
 * `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.

-You MUST take time to think in between read, write, run, and browse actions--do this with the `message` action.
+You MUST take time to think in between read, write, run, browse, and recall actions.
 You should never act twice in a row without thinking. But if your last several
-actions are all `message` actions, you should consider taking a different action.
+actions are all `think` actions, you should consider taking a different action.

 What is your next thought or action? Again, you must reply with JSON, and only with JSON.

 %(hint)s
 """

-
-def get_hint(latest_action_id: str) -> str:
-    """Returns action type hint based on given action_id"""
-    hints = {
-        '': "You haven't taken any actions yet. Start by using `ls` to check out what files you're working with.",
-        ActionType.RUN: 'You should think about the command you just ran, what output it gave, and how that affects your plan.',
-        ActionType.READ: 'You should think about the file you just read, what you learned from it, and how that affects your plan.',
-        ActionType.WRITE: 'You just changed a file. You should think about how it affects your plan.',
-        ActionType.BROWSE: 'You should think about the page you just visited, and what you learned from it.',
-        ActionType.MESSAGE: "Look at your last thought in the history above. What does it suggest? Don't think anymore--take action.",
-        ActionType.ADD_TASK: 'You should think about the next action to take.',
-        ActionType.MODIFY_TASK: 'You should think about the next action to take.',
-        ActionType.SUMMARIZE: '',
-        ActionType.FINISH: '',
-    }
-    return hints.get(latest_action_id, '')
-
-
-def get_prompt_and_images(
-    state: State, max_message_chars: int
-) -> tuple[str, list[str] | None]:
-    """Gets the prompt for the planner agent.
-
-    Formatted with the most recent action-observation pairs, current task, and hint based on last action
-
-    Parameters:
-    - state (State): The state of the current agent
-
-    Returns:
-    - str: The formatted string prompt with historical values
-    """
-    # the plan
-    plan_str = json.dumps(state.root_task.to_dict(), indent=2)
-
-    # the history
+def get_prompt(plan: Plan, history: List[Tuple[Action, Observation]]):
+    plan_str = json.dumps(plan.task.to_dict(), indent=2)
+    sub_history = history[-HISTORY_SIZE:]
    history_dicts = []
    latest_action: Action = NullAction()
-
-    # retrieve the latest HISTORY_SIZE events
-    for event_count, event in enumerate(reversed(state.history)):
-        if event_count >= HISTORY_SIZE:
-            break
-        if latest_action == NullAction() and isinstance(event, Action):
-            latest_action = event
-        history_dicts.append(event_to_memory(event, max_message_chars))
-
-    # history_dicts is in reverse order, lets fix it
-    history_dicts.reverse()
-
-    # and get it as a JSON string
+    for action, observation in sub_history:
+        if not isinstance(action, NullAction):
+            history_dicts.append(action.to_dict())
+            latest_action = action
+        if not isinstance(observation, NullObservation):
+            observation_dict = observation.to_dict()
+            if "extras" in observation_dict and "screenshot" in observation_dict["extras"]:
+                del observation_dict["extras"]["screenshot"]
+            history_dicts.append(observation_dict)
    history_str = json.dumps(history_dicts, indent=2)

-    # the plan status
-    current_task = state.root_task.get_current_task()
+    hint = ""
+    current_task = plan.get_current_task()
    if current_task is not None:
        plan_status = f"You're currently working on this task:\n{current_task.goal}."
        if len(current_task.subtasks) == 0:
            plan_status += "\nIf it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW."
    else:
        plan_status = "You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress."
+        hint = plan_status

-    # the hint, based on the last action
-    hint = get_hint(event_to_memory(latest_action, max_message_chars).get('action', ''))
-    logger.debug('HINT:\n' + hint, extra={'msg_type': 'DETAIL'})
+    latest_action_id = latest_action.to_dict()['action']

-    # the last relevant user message (the task)
-    message, image_urls = state.get_current_user_intent()
+    if current_task is not None:
+        if latest_action_id == "":
+            hint = "You haven't taken any actions yet. Start by using `ls` to check out what files you're working with."
+        elif latest_action_id == "run":
+            hint = "You should think about the command you just ran, what output it gave, and how that affects your plan."
+        elif latest_action_id == "read":
+            hint = "You should think about the file you just read, what you learned from it, and how that affects your plan."
+        elif latest_action_id == "write":
+            hint = "You just changed a file. You should think about how it affects your plan."
+        elif latest_action_id == "browse":
+            hint = "You should think about the page you just visited, and what you learned from it."
+        elif latest_action_id == "think":
+            hint = "Look at your last thought in the history above. What does it suggest? Don't think anymore--take action."
+        elif latest_action_id == "recall":
+            hint = "You should think about the information you just recalled, and how it should affect your plan."
+        elif latest_action_id == "add_task":
+            hint = "You should think about the next action to take."
+        elif latest_action_id == "modify_task":
+            hint = "You should think about the next action to take."
+        elif latest_action_id == "summarize":
+            hint = ""
+        elif latest_action_id == "finish":
+            hint = ""

-    # finally, fill in the prompt
+    print_with_color("HINT:\n" + hint, "INFO")
    return prompt % {
-        'task': message,
+        'task': plan.main_goal,
        'plan': plan_str,
        'history': history_str,
        'hint': hint,
        'plan_status': plan_status,
-    }, image_urls
-
+    }

 def parse_response(response: str) -> Action:
-    """Parses the model output to find a valid action to take
-    Parameters:
-    - response (str): A response from the model that potentially contains an Action.
-
-    Returns:
-    - Action: A valid next action to perform from model output
-    """
+    json_start = response.find("{")
+    json_end = response.rfind("}") + 1
+    response = response[json_start:json_end]
    action_dict = json.loads(response)
    if 'contents' in action_dict:
        # The LLM gets confused here. Might as well be robust
        action_dict['content'] = action_dict.pop('contents')
    action = action_from_dict(action_dict)
    return action
+
@@ -1,37 +0,0 @@
-from openhands.controller.action_parser import ResponseParser
-from openhands.core.utils import json
-from openhands.events.action import (
-    Action,
-)
-from openhands.events.serialization.action import action_from_dict
-
-
-class PlannerResponseParser(ResponseParser):
-    def __init__(self):
-        super().__init__()
-
-    def parse(self, response: str) -> Action:
-        action_str = self.parse_response(response)
-        return self.parse_action(action_str)
-
-    def parse_response(self, response) -> str:
-        # get the next action from the response
-        return response['choices'][0]['message']['content']
-
-    def parse_action(self, action_str: str) -> Action:
-        """Parses a string to find an action within it
-
-        Parameters:
-        - response (str): The string to be parsed
-
-        Returns:
-        - Action: The action that was found in the response string
-        """
-        # attempt to load the JSON dict from the response
-        action_dict = json.loads(action_str)
-
-        if 'content' in action_dict:
-            # The LLM gets confused here. Might as well be robust
-            action_dict['contents'] = action_dict.pop('content')
-
-        return action_from_dict(action_dict)
@@ -0,0 +1,4 @@
+# This is a template. Run `cp config.toml.template config.toml` to use it.
+
+LLM_API_KEY="<YOUR OPENAI API KEY>"
+WORKSPACE_DIR="./workspace"
@@ -1,5 +0,0 @@
-from openhands.controller.agent_controller import AgentController
-
-__all__ = [
-    'AgentController',
-]
@@ -1,77 +0,0 @@
-from abc import ABC, abstractmethod
-
-from openhands.events.action import Action
-
-
-class ActionParseError(Exception):
-    """Exception raised when the response from the LLM cannot be parsed into an action."""
-
-    def __init__(self, error: str):
-        self.error = error
-
-    def __str__(self):
-        return self.error
-
-
-class ResponseParser(ABC):
-    """This abstract base class is a general interface for an response parser dedicated to
-    parsing the action from the response from the LLM.
-    """
-
-    def __init__(
-        self,
-    ):
-        # Need pay attention to the item order in self.action_parsers
-        self.action_parsers = []
-
-    @abstractmethod
-    def parse(self, response: str) -> Action:
-        """Parses the action from the response from the LLM.
-
-        Parameters:
-        - response (str): The response from the LLM.
-
-        Returns:
-        - action (Action): The action parsed from the response.
-        """
-        pass
-
-    @abstractmethod
-    def parse_response(self, response) -> str:
-        """Parses the action from the response from the LLM.
-
-        Parameters:
-        - response (str): The response from the LLM.
-
-        Returns:
-        - action_str (str): The action str parsed from the response.
-        """
-        pass
-
-    @abstractmethod
-    def parse_action(self, action_str: str) -> Action:
-        """Parses the action from the response from the LLM.
-
-        Parameters:
-        - action_str (str): The response from the LLM.
-
-        Returns:
-        - action (Action): The action parsed from the response.
-        """
-        pass
-
-
-class ActionParser(ABC):
-    """This abstract base class is a general interface for an action parser dedicated to
-    parsing the action from the action str from the LLM.
-    """
-
-    @abstractmethod
-    def check_condition(self, action_str: str) -> bool:
-        """Check if the action string can be parsed by this parser."""
-        pass
-
-    @abstractmethod
-    def parse(self, action_str: str) -> Action:
-        """Parses the action from the action string from the LLM response."""
-        pass
@@ -1,111 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Type
-
-if TYPE_CHECKING:
-    from openhands.controller.state.state import State
-    from openhands.core.config import AgentConfig
-    from openhands.events.action import Action
-from openhands.core.exceptions import (
-    AgentAlreadyRegisteredError,
-    AgentNotRegisteredError,
-)
-from openhands.llm.llm import LLM
-from openhands.runtime.plugins import PluginRequirement
-from openhands.utils.prompt import PromptManager
-
-
-class Agent(ABC):
-    DEPRECATED = False
-    """
-    This abstract base class is an general interface for an agent dedicated to
-    executing a specific instruction and allowing human interaction with the
-    agent during execution.
-    It tracks the execution status and maintains a history of interactions.
-    """
-
-    _registry: dict[str, Type['Agent']] = {}
-    sandbox_plugins: list[PluginRequirement] = []
-
-    def __init__(
-        self,
-        llm: LLM,
-        config: 'AgentConfig',
-    ):
-        self.llm = llm
-        self.config = config
-        self._complete = False
-        self.prompt_manager: PromptManager | None = None
-
-    @property
-    def complete(self) -> bool:
-        """Indicates whether the current instruction execution is complete.
-
-        Returns:
-        - complete (bool): True if execution is complete; False otherwise.
-        """
-        return self._complete
-
-    @abstractmethod
-    def step(self, state: 'State') -> 'Action':
-        """Starts the execution of the assigned instruction. This method should
-        be implemented by subclasses to define the specific execution logic.
-        """
-        pass
-
-    def reset(self) -> None:
-        """Resets the agent's execution status and clears the history. This method can be used
-        to prepare the agent for restarting the instruction or cleaning up before destruction.
-
-        """
-        # TODO clear history
-        self._complete = False
-
-        if self.llm:
-            self.llm.reset()
-
-    @property
-    def name(self):
-        return self.__class__.__name__
-
-    @classmethod
-    def register(cls, name: str, agent_cls: Type['Agent']):
-        """Registers an agent class in the registry.
-
-        Parameters:
-        - name (str): The name to register the class under.
-        - agent_cls (Type['Agent']): The class to register.
-
-        Raises:
-        - AgentAlreadyRegisteredError: If name already registered
-        """
-        if name in cls._registry:
-            raise AgentAlreadyRegisteredError(name)
-        cls._registry[name] = agent_cls
-
-    @classmethod
-    def get_cls(cls, name: str) -> Type['Agent']:
-        """Retrieves an agent class from the registry.
-
-        Parameters:
-        - name (str): The name of the class to retrieve
-
-        Returns:
-        - agent_cls (Type['Agent']): The class registered under the specified name.
-
-        Raises:
-        - AgentNotRegisteredError: If name not registered
-        """
-        if name not in cls._registry:
-            raise AgentNotRegisteredError(name)
-        return cls._registry[name]
-
-    @classmethod
-    def list_agents(cls) -> list[str]:
-        """Retrieves the list of all agent names from the registry.
-
-        Raises:
-        - AgentNotRegisteredError: If no agent is registered
-        """
-        if not bool(cls._registry):
-            raise AgentNotRegisteredError()
-        return list(cls._registry.keys())
@@ -1,955 +0,0 @@
-import asyncio
-import copy
-import os
-import traceback
-from typing import Callable, ClassVar, Type
-
-import litellm
-from litellm.exceptions import BadRequestError, ContextWindowExceededError
-
-from openhands.controller.agent import Agent
-from openhands.controller.state.state import State, TrafficControlState
-from openhands.controller.stuck import StuckDetector
-from openhands.core.config import AgentConfig, LLMConfig
-from openhands.core.exceptions import (
-    FunctionCallNotExistsError,
-    FunctionCallValidationError,
-    LLMMalformedActionError,
-    LLMNoActionError,
-    LLMResponseError,
-)
-from openhands.core.logger import LOG_ALL_EVENTS
-from openhands.core.logger import openhands_logger as logger
-from openhands.core.schema import AgentState
-from openhands.events import EventSource, EventStream, EventStreamSubscriber
-from openhands.events.action import (
-    Action,
-    ActionConfirmationStatus,
-    AddTaskAction,
-    AgentDelegateAction,
-    AgentFinishAction,
-    AgentRejectAction,
-    ChangeAgentStateAction,
-    CmdRunAction,
-    IPythonRunCellAction,
-    MessageAction,
-    ModifyTaskAction,
-    NullAction,
-)
-from openhands.events.event import Event
-from openhands.events.observation import (
-    AgentDelegateObservation,
-    AgentStateChangedObservation,
-    ErrorObservation,
-    NullObservation,
-    Observation,
-)
-from openhands.events.serialization.event import truncate_content
-from openhands.llm.llm import LLM
-from openhands.utils.shutdown_listener import should_continue
-
-# note: RESUME is only available on web GUI
-TRAFFIC_CONTROL_REMINDER = (
-    "Please click on resume button if you'd like to continue, or start a new task."
-)
-
-
-class AgentController:
-    id: str
-    agent: Agent
-    max_iterations: int
-    event_stream: EventStream
-    state: State
-    confirmation_mode: bool
-    agent_to_llm_config: dict[str, LLMConfig]
-    agent_configs: dict[str, AgentConfig]
-    agent_task: asyncio.Future | None = None
-    parent: 'AgentController | None' = None
-    delegate: 'AgentController | None' = None
-    _pending_action: Action | None = None
-    _closed: bool = False
-    filter_out: ClassVar[tuple[type[Event], ...]] = (
-        NullAction,
-        NullObservation,
-        ChangeAgentStateAction,
-        AgentStateChangedObservation,
-    )
-
-    def __init__(
-        self,
-        agent: Agent,
-        event_stream: EventStream,
-        max_iterations: int,
-        max_budget_per_task: float | None = None,
-        agent_to_llm_config: dict[str, LLMConfig] | None = None,
-        agent_configs: dict[str, AgentConfig] | None = None,
-        sid: str = 'default',
-        confirmation_mode: bool = False,
-        initial_state: State | None = None,
-        is_delegate: bool = False,
-        headless_mode: bool = True,
-        status_callback: Callable | None = None,
-    ):
-        """Initializes a new instance of the AgentController class.
-
-        Args:
-            agent: The agent instance to control.
-            event_stream: The event stream to publish events to.
-            max_iterations: The maximum number of iterations the agent can run.
-            max_budget_per_task: The maximum budget (in USD) allowed per task, beyond which the agent will stop.
-            agent_to_llm_config: A dictionary mapping agent names to LLM configurations in the case that
-                we delegate to a different agent.
-            agent_configs: A dictionary mapping agent names to agent configurations in the case that
-                we delegate to a different agent.
-            sid: The session ID of the agent.
-            confirmation_mode: Whether to enable confirmation mode for agent actions.
-            initial_state: The initial state of the controller.
-            is_delegate: Whether this controller is a delegate.
-            headless_mode: Whether the agent is run in headless mode.
-            status_callback: Optional callback function to handle status updates.
-        """
-        self._step_lock = asyncio.Lock()
-        self.id = sid
-        self.agent = agent
-        self.headless_mode = headless_mode
-
-        # subscribe to the event stream
-        self.event_stream = event_stream
-        self.event_stream.subscribe(
-            EventStreamSubscriber.AGENT_CONTROLLER, self.on_event, self.id
-        )
-
-        # state from the previous session, state from a parent agent, or a fresh state
-        self.set_initial_state(
-            state=initial_state,
-            max_iterations=max_iterations,
-            confirmation_mode=confirmation_mode,
-        )
-        self.max_budget_per_task = max_budget_per_task
-        self.agent_to_llm_config = agent_to_llm_config if agent_to_llm_config else {}
-        self.agent_configs = agent_configs if agent_configs else {}
-        self._initial_max_iterations = max_iterations
-        self._initial_max_budget_per_task = max_budget_per_task
-
-        # stuck helper
-        self._stuck_detector = StuckDetector(self.state)
-        self.status_callback = status_callback
-
-    async def close(self) -> None:
-        """Closes the agent controller, canceling any ongoing tasks and unsubscribing from the event stream.
-
-        Note that it's fairly important that this closes properly, otherwise the state is incomplete.
-        """
-        await self.set_agent_state_to(AgentState.STOPPED)
-
-        # we made history, now is the time to rewrite it!
-        # the final state.history will be used by external scripts like evals, tests, etc.
-        # history will need to be complete WITH delegates events
-        # like the regular agent history, it does not include:
-        # - 'hidden' events, events with hidden=True
-        # - backend events (the default 'filtered out' types, types in self.filter_out)
-        start_id = self.state.start_id if self.state.start_id >= 0 else 0
-        end_id = (
-            self.state.end_id
-            if self.state.end_id >= 0
-            else self.event_stream.get_latest_event_id()
-        )
-        self.state.history = list(
-            self.event_stream.get_events(
-                start_id=start_id,
-                end_id=end_id,
-                reverse=False,
-                filter_out_type=self.filter_out,
-                filter_hidden=True,
-            )
-        )
-
-        # unsubscribe from the event stream
-        self.event_stream.unsubscribe(EventStreamSubscriber.AGENT_CONTROLLER, self.id)
-        self._closed = True
-
-    def log(self, level: str, message: str, extra: dict | None = None) -> None:
-        """Logs a message to the agent controller's logger.
-
-        Args:
-            level (str): The logging level to use (e.g., 'info', 'debug', 'error').
-            message (str): The message to log.
-            extra (dict | None, optional): Additional fields to include in the log. Defaults to None.
-        """
-        message = f'[Agent Controller {self.id}] {message}'
-        getattr(logger, level)(message, extra=extra, stacklevel=2)
-
-    def update_state_before_step(self):
-        self.state.iteration += 1
-        self.state.local_iteration += 1
-
-    async def update_state_after_step(self):
-        # update metrics especially for cost. Use deepcopy to avoid it being modified by agent._reset()
-        self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics)
-
-    async def _react_to_exception(
-        self,
-        e: Exception,
-    ):
-        await self.set_agent_state_to(AgentState.ERROR)
-        if self.status_callback is not None:
-            err_id = ''
-            if isinstance(e, litellm.AuthenticationError):
-                err_id = 'STATUS$ERROR_LLM_AUTHENTICATION'
-            self.status_callback('error', err_id, str(e))
-
-    async def start_step_loop(self):
-        """The main loop for the agent's step-by-step execution."""
-        self.log('info', 'Starting step loop...')
-        while True:
-            if not self._is_awaiting_observation() and not should_continue():
-                break
-            if self._closed:
-                break
-            try:
-                await self._step()
-            except asyncio.CancelledError:
-                self.log('debug', 'AgentController task was cancelled')
-                break
-            except Exception as e:
-                traceback.print_exc()
-                self.log('error', f'Error while running the agent: {e}')
-                await self._react_to_exception(e)
-
-            await asyncio.sleep(0.1)
-
-    async def on_event(self, event: Event) -> None:
-        """Callback from the event stream. Notifies the controller of incoming events.
-
-        Args:
-            event (Event): The incoming event to process.
-        """
-        if hasattr(event, 'hidden') and event.hidden:
-            return
-
-        # if the event is not filtered out, add it to the history
-        if not any(isinstance(event, filter_type) for filter_type in self.filter_out):
-            self.state.history.append(event)
-
-        if isinstance(event, Action):
-            await self._handle_action(event)
-        elif isinstance(event, Observation):
-            await self._handle_observation(event)
-
-    async def _handle_action(self, action: Action) -> None:
-        """Handles actions from the event stream.
-
-        Args:
-            action (Action): The action to handle.
-        """
-        if isinstance(action, ChangeAgentStateAction):
-            await self.set_agent_state_to(action.agent_state)  # type: ignore
-        elif isinstance(action, MessageAction):
-            await self._handle_message_action(action)
-        elif isinstance(action, AgentDelegateAction):
-            await self.start_delegate(action)
-        elif isinstance(action, AddTaskAction):
-            self.state.root_task.add_subtask(
-                action.parent, action.goal, action.subtasks
-            )
-        elif isinstance(action, ModifyTaskAction):
-            self.state.root_task.set_subtask_state(action.task_id, action.state)
-        elif isinstance(action, AgentFinishAction):
-            self.state.outputs = action.outputs
-            self.state.metrics.merge(self.state.local_metrics)
-            await self.set_agent_state_to(AgentState.FINISHED)
-        elif isinstance(action, AgentRejectAction):
-            self.state.outputs = action.outputs
-            self.state.metrics.merge(self.state.local_metrics)
-            await self.set_agent_state_to(AgentState.REJECTED)
-
-    async def _handle_observation(self, observation: Observation) -> None:
-        """Handles observation from the event stream.
-
-        Args:
-            observation (observation): The observation to handle.
-        """
-        observation_to_print = copy.deepcopy(observation)
-        if len(observation_to_print.content) > self.agent.llm.config.max_message_chars:
-            observation_to_print.content = truncate_content(
-                observation_to_print.content, self.agent.llm.config.max_message_chars
-            )
-        # Use info level if LOG_ALL_EVENTS is set
-        log_level = 'info' if os.getenv('LOG_ALL_EVENTS') in ('true', '1') else 'debug'
-        self.log(
-            log_level, str(observation_to_print), extra={'msg_type': 'OBSERVATION'}
-        )
-
-        if observation.llm_metrics is not None:
-            self.agent.llm.metrics.merge(observation.llm_metrics)
-
-        if self._pending_action and self._pending_action.id == observation.cause:
-            if self.state.agent_state == AgentState.AWAITING_USER_CONFIRMATION:
-                return
-            self._pending_action = None
-            if self.state.agent_state == AgentState.USER_CONFIRMED:
-                await self.set_agent_state_to(AgentState.RUNNING)
-            if self.state.agent_state == AgentState.USER_REJECTED:
-                await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
-            return
-        elif isinstance(observation, ErrorObservation):
-            if self.state.agent_state == AgentState.ERROR:
-                self.state.metrics.merge(self.state.local_metrics)
-
-    async def _handle_message_action(self, action: MessageAction) -> None:
-        """Handles message actions from the event stream.
-
-        Args:
-            action (MessageAction): The message action to handle.
-        """
-        if action.source == EventSource.USER:
-            # Use info level if LOG_ALL_EVENTS is set
-            log_level = (
-                'info' if os.getenv('LOG_ALL_EVENTS') in ('true', '1') else 'debug'
-            )
-            self.log(
-                log_level,
-                str(action),
-                extra={'msg_type': 'ACTION', 'event_source': EventSource.USER},
-            )
-            # Extend max iterations when the user sends a message (only in non-headless mode)
-            if self._initial_max_iterations is not None and not self.headless_mode:
-                self.state.max_iterations = (
-                    self.state.iteration + self._initial_max_iterations
-                )
-                if (
-                    self.state.traffic_control_state == TrafficControlState.THROTTLING
-                    or self.state.traffic_control_state == TrafficControlState.PAUSED
-                ):
-                    self.state.traffic_control_state = TrafficControlState.NORMAL
-                self.log(
-                    'debug',
-                    f'Extended max iterations to {self.state.max_iterations} after user message',
-                )
-            if self.get_agent_state() != AgentState.RUNNING:
-                await self.set_agent_state_to(AgentState.RUNNING)
-        elif action.source == EventSource.AGENT and action.wait_for_response:
-            await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
-
-    def _reset(self) -> None:
-        """Resets the agent controller"""
-
-        self._pending_action = None
-        self.agent.reset()
-
-    async def set_agent_state_to(self, new_state: AgentState) -> None:
-        """Updates the agent's state and handles side effects. Can emit events to the event stream.
-
-        Args:
-            new_state (AgentState): The new state to set for the agent.
-        """
-        self.log(
-            'info',
-            f'Setting agent({self.agent.name}) state from {self.state.agent_state} to {new_state}',
-        )
-
-        if new_state == self.state.agent_state:
-            return
-
-        if new_state in (AgentState.STOPPED, AgentState.ERROR):
-            self._reset()
-        elif (
-            new_state == AgentState.RUNNING
-            and self.state.agent_state == AgentState.PAUSED
-            # TODO: do we really need both THROTTLING and PAUSED states, or can we clean up one of them completely?
-            and self.state.traffic_control_state == TrafficControlState.THROTTLING
-        ):
-            # user intends to interrupt traffic control and let the task resume temporarily
-            self.state.traffic_control_state = TrafficControlState.PAUSED
-            # User has chosen to deliberately continue - lets double the max iterations
-            if (
-                self.state.iteration is not None
-                and self.state.max_iterations is not None
-                and self._initial_max_iterations is not None
-                and not self.headless_mode
-            ):
-                if self.state.iteration >= self.state.max_iterations:
-                    self.state.max_iterations += self._initial_max_iterations
-
-            if (
-                self.state.metrics.accumulated_cost is not None
-                and self.max_budget_per_task is not None
-                and self._initial_max_budget_per_task is not None
-            ):
-                if self.state.metrics.accumulated_cost >= self.max_budget_per_task:
-                    self.max_budget_per_task += self._initial_max_budget_per_task
-        elif self._pending_action is not None and (
-            new_state in (AgentState.USER_CONFIRMED, AgentState.USER_REJECTED)
-        ):
-            if hasattr(self._pending_action, 'thought'):
-                self._pending_action.thought = ''  # type: ignore[union-attr]
-            if new_state == AgentState.USER_CONFIRMED:
-                confirmation_state = ActionConfirmationStatus.CONFIRMED
-            else:
-                confirmation_state = ActionConfirmationStatus.REJECTED
-            self._pending_action.confirmation_state = confirmation_state  # type: ignore[attr-defined]
-            self._pending_action._id = None  # type: ignore[attr-defined]
-            self.event_stream.add_event(self._pending_action, EventSource.AGENT)
-
-        self.state.agent_state = new_state
-        self.event_stream.add_event(
-            AgentStateChangedObservation('', self.state.agent_state),
-            EventSource.ENVIRONMENT,
-        )
-
-        if new_state == AgentState.INIT and self.state.resume_state:
-            await self.set_agent_state_to(self.state.resume_state)
-            self.state.resume_state = None
-
-    def get_agent_state(self) -> AgentState:
-        """Returns the current state of the agent.
-
-        Returns:
-            AgentState: The current state of the agent.
-        """
-        return self.state.agent_state
-
-    async def start_delegate(self, action: AgentDelegateAction) -> None:
-        """Start a delegate agent to handle a subtask.
-
-        OpenHands is a multi-agentic system. A `task` is a conversation between
-        OpenHands (the whole system) and the user, which might involve one or more inputs
-        from the user. It starts with an initial input (typically a task statement) from
-        the user, and ends with either an `AgentFinishAction` initiated by the agent, a
-        stop initiated by the user, or an error.
-
-        A `subtask` is a conversation between an agent and the user, or another agent. If a `task`
-        is conducted by a single agent, then it's also a `subtask`. Otherwise, a `task` consists of
-        multiple `subtasks`, each executed by one agent.
-
-        Args:
-            action (AgentDelegateAction): The action containing information about the delegate agent to start.
-        """
-        agent_cls: Type[Agent] = Agent.get_cls(action.agent)
-        agent_config = self.agent_configs.get(action.agent, self.agent.config)
-        llm_config = self.agent_to_llm_config.get(action.agent, self.agent.llm.config)
-        llm = LLM(config=llm_config)
-        delegate_agent = agent_cls(llm=llm, config=agent_config)
-        state = State(
-            inputs=action.inputs or {},
-            local_iteration=0,
-            iteration=self.state.iteration,
-            max_iterations=self.state.max_iterations,
-            delegate_level=self.state.delegate_level + 1,
-            # global metrics should be shared between parent and child
-            metrics=self.state.metrics,
-            # start on top of the stream
-            start_id=self.event_stream.get_latest_event_id() + 1,
-        )
-        self.log(
-            'debug',
-            f'start delegate, creating agent {delegate_agent.name} using LLM {llm}',
-        )
-
-        self.event_stream.unsubscribe(EventStreamSubscriber.AGENT_CONTROLLER, self.id)
-        self.delegate = AgentController(
-            sid=self.id + '-delegate',
-            agent=delegate_agent,
-            event_stream=self.event_stream,
-            max_iterations=self.state.max_iterations,
-            max_budget_per_task=self.max_budget_per_task,
-            agent_to_llm_config=self.agent_to_llm_config,
-            agent_configs=self.agent_configs,
-            initial_state=state,
-            is_delegate=True,
-            headless_mode=self.headless_mode,
-        )
-        await self.delegate.set_agent_state_to(AgentState.RUNNING)
-
-    async def _step(self) -> None:
-        """Executes a single step of the parent or delegate agent. Detects stuck agents and limits on the number of iterations and the task budget."""
-        if self.get_agent_state() != AgentState.RUNNING:
-            await asyncio.sleep(1)
-            return
-
-        if self._pending_action:
-            await asyncio.sleep(1)
-            return
-
-        if self.delegate is not None:
-            assert self.delegate != self
-            if self.delegate.get_agent_state() == AgentState.PAUSED:
-                # no need to check too often
-                await asyncio.sleep(1)
-            else:
-                await self._delegate_step()
-            return
-
-        self.log(
-            'info',
-            f'LEVEL {self.state.delegate_level} LOCAL STEP {self.state.local_iteration} GLOBAL STEP {self.state.iteration}',
-            extra={'msg_type': 'STEP'},
-        )
-
-        # check if agent hit the resources limit
-        stop_step = False
-        if self.state.iteration >= self.state.max_iterations:
-            stop_step = await self._handle_traffic_control(
-                'iteration', self.state.iteration, self.state.max_iterations
-            )
-        if self.max_budget_per_task is not None:
-            current_cost = self.state.metrics.accumulated_cost
-            if current_cost > self.max_budget_per_task:
-                stop_step = await self._handle_traffic_control(
-                    'budget', current_cost, self.max_budget_per_task
-                )
-        if stop_step:
-            return
-
-        if self._is_stuck():
-            await self._react_to_exception(RuntimeError('Agent got stuck in a loop'))
-            return
-
-        self.update_state_before_step()
-        action: Action = NullAction()
-        try:
-            action = self.agent.step(self.state)
-            if action is None:
-                raise LLMNoActionError('No action was returned')
-        except (
-            LLMMalformedActionError,
-            LLMNoActionError,
-            LLMResponseError,
-            FunctionCallValidationError,
-            FunctionCallNotExistsError,
-        ) as e:
-            self.event_stream.add_event(
-                ErrorObservation(
-                    content=str(e),
-                ),
-                EventSource.AGENT,
-            )
-            return
-        except (ContextWindowExceededError, BadRequestError) as e:
-            # FIXME: this is a hack until a litellm fix is confirmed
-            # Check if this is a nested context window error
-            error_str = str(e).lower()
-            if (
-                'contextwindowexceedederror' in error_str
-                or 'prompt is too long' in error_str
-                or isinstance(e, ContextWindowExceededError)
-            ):
-                # When context window is exceeded, keep roughly half of agent interactions
-                self.state.history = self._apply_conversation_window(self.state.history)
-
-                # Save the ID of the first event in our truncated history for future reloading
-                if self.state.history:
-                    self.state.start_id = self.state.history[0].id
-                # Don't add error event - let the agent retry with reduced context
-                return
-            raise
-
-        if action.runnable:
-            if self.state.confirmation_mode and (
-                type(action) is CmdRunAction or type(action) is IPythonRunCellAction
-            ):
-                action.confirmation_state = (
-                    ActionConfirmationStatus.AWAITING_CONFIRMATION
-                )
-            self._pending_action = action
-
-        if not isinstance(action, NullAction):
-            if (
-                hasattr(action, 'confirmation_state')
-                and action.confirmation_state
-                == ActionConfirmationStatus.AWAITING_CONFIRMATION
-            ):
-                await self.set_agent_state_to(AgentState.AWAITING_USER_CONFIRMATION)
-            self.event_stream.add_event(action, EventSource.AGENT)
-
-        await self.update_state_after_step()
-
-        log_level = 'info' if LOG_ALL_EVENTS else 'debug'
-        self.log(log_level, str(action), extra={'msg_type': 'ACTION'})
-
-    async def _delegate_step(self) -> None:
-        """Executes a single step of the delegate agent."""
-        await self.delegate._step()  # type: ignore[union-attr]
-        assert self.delegate is not None
-        delegate_state = self.delegate.get_agent_state()
-        self.log('debug', f'Delegate state: {delegate_state}')
-        if delegate_state == AgentState.ERROR:
-            # update iteration that shall be shared across agents
-            self.state.iteration = self.delegate.state.iteration
-
-            # emit AgentDelegateObservation to mark delegate termination due to error
-            delegate_outputs = (
-                self.delegate.state.outputs if self.delegate.state else {}
-            )
-            content = (
-                f'{self.delegate.agent.name} encountered an error during execution.'
-            )
-            obs = AgentDelegateObservation(outputs=delegate_outputs, content=content)
-            self.event_stream.add_event(obs, EventSource.AGENT)
-
-            # close the delegate upon error
-            await self.delegate.close()
-
-            # resubscribe parent when delegate is finished
-            self.event_stream.subscribe(
-                EventStreamSubscriber.AGENT_CONTROLLER, self.on_event, self.id
-            )
-            self.delegate = None
-            self.delegateAction = None
-
-        elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
-            self.log('debug', 'Delegate agent has finished execution')
-            # retrieve delegate result
-            outputs = self.delegate.state.outputs if self.delegate.state else {}
-
-            # update iteration that shall be shared across agents
-            self.state.iteration = self.delegate.state.iteration
-
-            # close delegate controller: we must close the delegate controller before adding new events
-            await self.delegate.close()
-
-            # resubscribe parent when delegate is finished
-            self.event_stream.subscribe(
-                EventStreamSubscriber.AGENT_CONTROLLER, self.on_event, self.id
-            )
-
-            # update delegate result observation
-            # TODO: replace this with AI-generated summary (#2395)
-            formatted_output = ', '.join(
-                f'{key}: {value}' for key, value in outputs.items()
-            )
-            content = (
-                f'{self.delegate.agent.name} finishes task with {formatted_output}'
-            )
-            obs = AgentDelegateObservation(outputs=outputs, content=content)
-
-            # clean up delegate status
-            self.delegate = None
-            self.delegateAction = None
-            self.event_stream.add_event(obs, EventSource.AGENT)
-        return
-
-    async def _handle_traffic_control(
-        self, limit_type: str, current_value: float, max_value: float
-    ) -> bool:
-        """Handles agent state after hitting the traffic control limit.
-
-        Args:
-            limit_type (str): The type of limit that was hit.
-            current_value (float): The current value of the limit.
-            max_value (float): The maximum value of the limit.
-        """
-        stop_step = False
-        if self.state.traffic_control_state == TrafficControlState.PAUSED:
-            self.log(
-                'debug', 'Hitting traffic control, temporarily resume upon user request'
-            )
-            self.state.traffic_control_state = TrafficControlState.NORMAL
-        else:
-            self.state.traffic_control_state = TrafficControlState.THROTTLING
-            # Format values as integers for iterations, keep decimals for budget
-            if limit_type == 'iteration':
-                current_str = str(int(current_value))
-                max_str = str(int(max_value))
-            else:
-                current_str = f'{current_value:.2f}'
-                max_str = f'{max_value:.2f}'
-
-            if self.headless_mode:
-                e = RuntimeError(
-                    f'Agent reached maximum {limit_type} in headless mode. '
-                    f'Current {limit_type}: {current_str}, max {limit_type}: {max_str}'
-                )
-                await self._react_to_exception(e)
-            else:
-                e = RuntimeError(
-                    f'Agent reached maximum {limit_type}. '
-                    f'Current {limit_type}: {current_str}, max {limit_type}: {max_str}. '
-                )
-                # FIXME: this isn't really an exception--we should have a different path
-                await self._react_to_exception(e)
-            stop_step = True
-        return stop_step
-
-    def get_state(self) -> State:
-        """Returns the current running state object.
-
-        Returns:
-            State: The current state object.
-        """
-        return self.state
-
-    def set_initial_state(
-        self,
-        state: State | None,
-        max_iterations: int,
-        confirmation_mode: bool = False,
-    ) -> None:
-        """Sets the initial state for the agent, either from the previous session, or from a parent agent, or by creating a new one.
-
-        Args:
-            state: The state to initialize with, or None to create a new state.
-            max_iterations: The maximum number of iterations allowed for the task.
-            confirmation_mode: Whether to enable confirmation mode.
-        """
-        # state can come from:
-        # - the previous session, in which case it has history
-        # - from a parent agent, in which case it has no history
-        # - None / a new state
-        if state is None:
-            self.state = State(
-                inputs={},
-                max_iterations=max_iterations,
-                confirmation_mode=confirmation_mode,
-            )
-        else:
-            self.state = state
-
-            if self.state.start_id <= -1:
-                self.state.start_id = 0
-
-            self.log(
-                'debug',
-                f'AgentController {self.id} initializing history from event {self.state.start_id}',
-            )
-
-            self._init_history()
-
-    def _init_history(self) -> None:
-        """Initializes the agent's history from the event stream.
-
-        The history is a list of events that:
-        - Excludes events of types listed in self.filter_out
-        - Excludes events with hidden=True attribute
-        - For delegate events (between AgentDelegateAction and AgentDelegateObservation):
-            - Excludes all events between the action and observation
-            - Includes the delegate action and observation themselves
-
-        The history is loaded in two parts if truncation_id is set:
-        1. First user message from start_id onwards
-        2. Rest of history from truncation_id to the end
-
-        Otherwise loads normally from start_id.
-        """
-        # define range of events to fetch
-        # delegates start with a start_id and initially won't find any events
-        # otherwise we're restoring a previous session
-        start_id = self.state.start_id if self.state.start_id >= 0 else 0
-        end_id = (
-            self.state.end_id
-            if self.state.end_id >= 0
-            else self.event_stream.get_latest_event_id()
-        )
-
-        # sanity check
-        if start_id > end_id + 1:
-            self.log(
-                'warning',
-                f'start_id {start_id} is greater than end_id + 1 ({end_id + 1}). History will be empty.',
-            )
-            self.state.history = []
-            return
-
-        events: list[Event] = []
-
-        # If we have a truncation point, get first user message and then rest of history
-        if hasattr(self.state, 'truncation_id') and self.state.truncation_id > 0:
-            # Find first user message from stream
-            first_user_msg = next(
-                (
-                    e
-                    for e in self.event_stream.get_events(
-                        start_id=start_id,
-                        end_id=end_id,
-                        reverse=False,
-                        filter_out_type=self.filter_out,
-                        filter_hidden=True,
-                    )
-                    if isinstance(e, MessageAction) and e.source == EventSource.USER
-                ),
-                None,
-            )
-            if first_user_msg:
-                events.append(first_user_msg)
-
-            # the rest of the events are from the truncation point
-            start_id = self.state.truncation_id
-
-        # Get rest of history
-        events_to_add = list(
-            self.event_stream.get_events(
-                start_id=start_id,
-                end_id=end_id,
-                reverse=False,
-                filter_out_type=self.filter_out,
-                filter_hidden=True,
-            )
-        )
-        events.extend(events_to_add)
-
-        # Find all delegate action/observation pairs
-        delegate_ranges: list[tuple[int, int]] = []
-        delegate_action_ids: list[int] = []  # stack of unmatched delegate action IDs
-
-        for event in events:
-            if isinstance(event, AgentDelegateAction):
-                delegate_action_ids.append(event.id)
-                # Note: we can get agent=event.agent and task=event.inputs.get('task','')
-                # if we need to track these in the future
-
-            elif isinstance(event, AgentDelegateObservation):
-                # Match with most recent unmatched delegate action
-                if not delegate_action_ids:
-                    self.log(
-                        'warning',
-                        f'Found AgentDelegateObservation without matching action at id={event.id}',
-                    )
-                    continue
-
-                action_id = delegate_action_ids.pop()
-                delegate_ranges.append((action_id, event.id))
-
-        # Filter out events between delegate action/observation pairs
-        if delegate_ranges:
-            filtered_events: list[Event] = []
-            current_idx = 0
-
-            for start_id, end_id in sorted(delegate_ranges):
-                # Add events before delegate range
-                filtered_events.extend(
-                    event for event in events[current_idx:] if event.id < start_id
-                )
-
-                # Add delegate action and observation
-                filtered_events.extend(
-                    event for event in events if event.id in (start_id, end_id)
-                )
-
-                # Update index to after delegate range
-                current_idx = next(
-                    (i for i, e in enumerate(events) if e.id > end_id), len(events)
-                )
-
-            # Add any remaining events after last delegate range
-            filtered_events.extend(events[current_idx:])
-
-            self.state.history = filtered_events
-        else:
-            self.state.history = events
-
-        # make sure history is in sync
-        self.state.start_id = start_id
-
-    def _apply_conversation_window(self, events: list[Event]) -> list[Event]:
-        """Cuts history roughly in half when context window is exceeded, preserving action-observation pairs
-        and ensuring the first user message is always included.
-
-        The algorithm:
-        1. Cut history in half
-        2. Check first event in new history:
-           - If Observation: find and include its Action
-           - If MessageAction: ensure its related Action-Observation pair isn't split
-        3. Always include the first user message
-
-        Args:
-            events: List of events to filter
-
-        Returns:
-            Filtered list of events keeping newest half while preserving pairs
-        """
-        if not events:
-            return events
-
-        # Find first user message - we'll need to ensure it's included
-        first_user_msg = next(
-            (
-                e
-                for e in events
-                if isinstance(e, MessageAction) and e.source == EventSource.USER
-            ),
-            None,
-        )
-
-        # cut in half
-        mid_point = max(1, len(events) // 2)
-        kept_events = events[mid_point:]
-
-        # Handle first event in truncated history
-        if kept_events:
-            i = 0
-            while i < len(kept_events):
-                first_event = kept_events[i]
-                if isinstance(first_event, Observation) and first_event.cause:
-                    # Find its action and include it
-                    matching_action = next(
-                        (
-                            e
-                            for e in reversed(events[:mid_point])
-                            if isinstance(e, Action) and e.id == first_event.cause
-                        ),
-                        None,
-                    )
-                    if matching_action:
-                        kept_events = [matching_action] + kept_events
-                    else:
-                        self.log(
-                            'warning',
-                            f'Found Observation without matching Action at id={first_event.id}',
-                        )
-                        # drop this observation
-                        kept_events = kept_events[1:]
-                    break
-
-                elif isinstance(first_event, MessageAction) or (
-                    isinstance(first_event, Action)
-                    and first_event.source == EventSource.USER
-                ):
-                    # if it's a message action or a user action, keep it and continue to find the next event
-                    i += 1
-                    continue
-
-                else:
-                    # if it's an action with source == EventSource.AGENT, we're good
-                    break
-
-        # Save where to continue from in next reload
-        if kept_events:
-            self.state.truncation_id = kept_events[0].id
-
-        # Ensure first user message is included
-        if first_user_msg and first_user_msg not in kept_events:
-            kept_events = [first_user_msg] + kept_events
-
-        # start_id points to first user message
-        if first_user_msg:
-            self.state.start_id = first_user_msg.id
-
-        return kept_events
-
-    def _is_stuck(self) -> bool:
-        """Checks if the agent or its delegate is stuck in a loop.
-
-        Returns:
-            bool: True if the agent is stuck, False otherwise.
-        """
-        # check if delegate stuck
-        if self.delegate and self.delegate._is_stuck():
-            return True
-
-        return self._stuck_detector.is_stuck(self.headless_mode)
-
-    def __repr__(self):
-        return (
-            f'AgentController(id={self.id}, agent={self.agent!r}, '
-            f'event_stream={self.event_stream!r}, '
-            f'state={self.state!r}, agent_task={self.agent_task!r}, '
-            f'delegate={self.delegate!r}, _pending_action={self._pending_action!r})'
-        )
-
-    def _is_awaiting_observation(self):
-        events = self.event_stream.get_events(reverse=True)
-        for event in events:
-            if isinstance(event, AgentStateChangedObservation):
-                result = event.agent_state == AgentState.RUNNING
-                return result
-        return False
@@ -1,171 +0,0 @@
-import base64
-import pickle
-from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any
-
-from openhands.controller.state.task import RootTask
-from openhands.core.logger import openhands_logger as logger
-from openhands.core.schema import AgentState
-from openhands.events.action import (
-    MessageAction,
-)
-from openhands.events.action.agent import AgentFinishAction
-from openhands.events.event import Event, EventSource
-from openhands.llm.metrics import Metrics
-from openhands.storage.files import FileStore
-
-
-class TrafficControlState(str, Enum):
-    # default state, no rate limiting
-    NORMAL = 'normal'
-
-    # task paused due to traffic control
-    THROTTLING = 'throttling'
-
-    # traffic control is temporarily paused
-    PAUSED = 'paused'
-
-
-RESUMABLE_STATES = [
-    AgentState.RUNNING,
-    AgentState.PAUSED,
-    AgentState.AWAITING_USER_INPUT,
-    AgentState.FINISHED,
-]
-
-
-@dataclass
-class State:
-    """
-    Represents the running state of an agent in the OpenHands system, saving data of its operation and memory.
-
-    - Multi-agent/delegate state:
-      - store the task (conversation between the agent and the user)
-      - the subtask (conversation between an agent and the user or another agent)
-      - global and local iterations
-      - delegate levels for multi-agent interactions
-      - almost stuck state
-
-    - Running state of an agent:
-      - current agent state (e.g., LOADING, RUNNING, PAUSED)
-      - traffic control state for rate limiting
-      - confirmation mode
-      - the last error encountered
-
-    - Data for saving and restoring the agent:
-      - save to and restore from a session
-      - serialize with pickle and base64
-
-    - Save / restore data about message history
-      - start and end IDs for events in agent's history
-      - summaries and delegate summaries
-
-    - Metrics:
-      - global metrics for the current task
-      - local metrics for the current subtask
-
-    - Extra data:
-      - additional task-specific data
-    """
-
-    root_task: RootTask = field(default_factory=RootTask)
-    # global iteration for the current task
-    iteration: int = 0
-    # local iteration for the current subtask
-    local_iteration: int = 0
-    # max number of iterations for the current task
-    max_iterations: int = 100
-    confirmation_mode: bool = False
-    history: list[Event] = field(default_factory=list)
-    inputs: dict = field(default_factory=dict)
-    outputs: dict = field(default_factory=dict)
-    agent_state: AgentState = AgentState.LOADING
-    resume_state: AgentState | None = None
-    traffic_control_state: TrafficControlState = TrafficControlState.NORMAL
-    # global metrics for the current task
-    metrics: Metrics = field(default_factory=Metrics)
-    # local metrics for the current subtask
-    local_metrics: Metrics = field(default_factory=Metrics)
-    # root agent has level 0, and every delegate increases the level by one
-    delegate_level: int = 0
-    # start_id and end_id track the range of events in history
-    start_id: int = -1
-    end_id: int = -1
-    # truncation_id tracks where to load history after context window truncation
-    truncation_id: int = -1
-
-    delegates: dict[tuple[int, int], tuple[str, str]] = field(default_factory=dict)
-    # NOTE: This will never be used by the controller, but it can be used by different
-    # evaluation tasks to store extra data needed to track the progress/state of the task.
-    extra_data: dict[str, Any] = field(default_factory=dict)
-    last_error: str = ''
-
-    def save_to_session(self, sid: str, file_store: FileStore):
-        pickled = pickle.dumps(self)
-        logger.debug(f'Saving state to session {sid}:{self.agent_state}')
-        encoded = base64.b64encode(pickled).decode('utf-8')
-        try:
-            file_store.write(f'sessions/{sid}/agent_state.pkl', encoded)
-        except Exception as e:
-            logger.error(f'Failed to save state to session: {e}')
-            raise e
-
-    @staticmethod
-    def restore_from_session(sid: str, file_store: FileStore) -> 'State':
-        try:
-            encoded = file_store.read(f'sessions/{sid}/agent_state.pkl')
-            pickled = base64.b64decode(encoded)
-            state = pickle.loads(pickled)
-        except Exception as e:
-            logger.debug(f'Could not restore state from session: {e}')
-            raise e
-
-        # update state
-        if state.agent_state in RESUMABLE_STATES:
-            state.resume_state = state.agent_state
-        else:
-            state.resume_state = None
-
-        # first state after restore
-        state.agent_state = AgentState.LOADING
-        return state
-
-    def __getstate__(self):
-        # don't pickle history, it will be restored from the event stream
-        state = self.__dict__.copy()
-        state['history'] = []
-        return state
-
-    def __setstate__(self, state):
-        self.__dict__.update(state)
-
-        # make sure we always have the attribute history
-        if not hasattr(self, 'history'):
-            self.history = []
-
-    def get_current_user_intent(self) -> tuple[str | None, list[str] | None]:
-        """Returns the latest user message and image(if provided) that appears after a FinishAction, or the first (the task) if nothing was finished yet."""
-        last_user_message = None
-        last_user_message_image_urls: list[str] | None = []
-        for event in reversed(self.history):
-            if isinstance(event, MessageAction) and event.source == 'user':
-                last_user_message = event.content
-                last_user_message_image_urls = event.image_urls
-            elif isinstance(event, AgentFinishAction):
-                if last_user_message is not None:
-                    return last_user_message, None
-
-        return last_user_message, last_user_message_image_urls
-
-    def get_last_agent_message(self) -> MessageAction | None:
-        for event in reversed(self.history):
-            if isinstance(event, MessageAction) and event.source == EventSource.AGENT:
-                return event
-        return None
-
-    def get_last_user_message(self) -> MessageAction | None:
-        for event in reversed(self.history):
-            if isinstance(event, MessageAction) and event.source == EventSource.USER:
-                return event
-        return None
@@ -1,335 +0,0 @@
-from openhands.controller.state.state import State
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action.action import Action
-from openhands.events.action.commands import IPythonRunCellAction
-from openhands.events.action.empty import NullAction
-from openhands.events.action.message import MessageAction
-from openhands.events.event import Event, EventSource
-from openhands.events.observation.commands import (
-    CmdOutputObservation,
-    IPythonRunCellObservation,
-)
-from openhands.events.observation.empty import NullObservation
-from openhands.events.observation.error import ErrorObservation
-from openhands.events.observation.observation import Observation
-
-
-class StuckDetector:
-    SYNTAX_ERROR_MESSAGES = [
-        'SyntaxError: unterminated string literal (detected at line',
-        'SyntaxError: invalid syntax. Perhaps you forgot a comma?',
-        'SyntaxError: incomplete input',
-    ]
-
-    def __init__(self, state: State):
-        self.state = state
-
-    def is_stuck(self, headless_mode: bool = True):
-        """Checks if the agent is stuck in a loop.
-
-        Args:
-            headless_mode: Matches AgentController's headless_mode.
-                          If True: Consider all history (automated/testing)
-                          If False: Consider only history after last user message (interactive)
-
-        Returns:
-            bool: True if the agent is stuck in a loop, False otherwise.
-        """
-        if not headless_mode:
-            # In interactive mode, only look at history after the last user message
-            last_user_msg_idx = -1
-            for i, event in enumerate(reversed(self.state.history)):
-                if (
-                    isinstance(event, MessageAction)
-                    and event.source == EventSource.USER
-                ):
-                    last_user_msg_idx = len(self.state.history) - i - 1
-                    break
-
-            history_to_check = self.state.history[last_user_msg_idx + 1 :]
-        else:
-            # In headless mode, look at all history
-            history_to_check = self.state.history
-
-        # Filter out user messages and null events
-        filtered_history = [
-            event
-            for event in history_to_check
-            if not (
-                # Filter works elegantly in both modes:
-                # - In headless: actively filters out user messages from full history
-                # - In non-headless: no-op since we already sliced after last user message
-                (isinstance(event, MessageAction) and event.source == EventSource.USER)
-                # there might be some NullAction or NullObservation in the history at least for now
-                or isinstance(event, (NullAction, NullObservation))
-            )
-        ]
-
-        # it takes 3 actions minimum to detect a loop, otherwise nothing to do here
-        if len(filtered_history) < 3:
-            return False
-
-        # the first few scenarios detect 3 or 4 repeated steps
-        # prepare the last 4 actions and observations, to check them out
-        last_actions: list[Event] = []
-        last_observations: list[Event] = []
-
-        # retrieve the last four actions and observations starting from the end of history, wherever they are
-        for event in reversed(filtered_history):
-            if isinstance(event, Action) and len(last_actions) < 4:
-                last_actions.append(event)
-            elif isinstance(event, Observation) and len(last_observations) < 4:
-                last_observations.append(event)
-
-            if len(last_actions) == 4 and len(last_observations) == 4:
-                break
-
-        # scenario 1: same action, same observation
-        if self._is_stuck_repeating_action_observation(last_actions, last_observations):
-            return True
-
-        # scenario 2: same action, errors
-        if self._is_stuck_repeating_action_error(last_actions, last_observations):
-            return True
-
-        # scenario 3: monologue
-        if self._is_stuck_monologue(filtered_history):
-            return True
-
-        # scenario 4: action, observation pattern on the last six steps
-        if len(filtered_history) < 6:
-            return False
-        if self._is_stuck_action_observation_pattern(filtered_history):
-            return True
-
-        return False
-
-    def _is_stuck_repeating_action_observation(self, last_actions, last_observations):
-        # scenario 1: same action, same observation
-        # it takes 4 actions and 4 observations to detect a loop
-        # assert len(last_actions) == 4 and len(last_observations) == 4
-
-        # Check for a loop of 4 identical action-observation pairs
-        if len(last_actions) == 4 and len(last_observations) == 4:
-            actions_equal = all(
-                self._eq_no_pid(last_actions[0], action) for action in last_actions
-            )
-            observations_equal = all(
-                self._eq_no_pid(last_observations[0], observation)
-                for observation in last_observations
-            )
-
-            if actions_equal and observations_equal:
-                logger.warning('Action, Observation loop detected')
-                return True
-
-        return False
-
-    def _is_stuck_repeating_action_error(self, last_actions, last_observations):
-        # scenario 2: same action, errors
-        # it takes 3 actions and 3 observations to detect a loop
-        # check if the last three actions are the same and result in errors
-
-        if len(last_actions) < 4 or len(last_observations) < 4:
-            return False
-
-        # are the last three actions the "same"?
-        if all(self._eq_no_pid(last_actions[0], action) for action in last_actions[:3]):
-            # and the last three observations are all errors?
-            if all(isinstance(obs, ErrorObservation) for obs in last_observations[:3]):
-                logger.warning('Action, ErrorObservation loop detected')
-                return True
-            # or, are the last three observations all IPythonRunCellObservation with SyntaxError?
-            elif all(
-                isinstance(obs, IPythonRunCellObservation)
-                for obs in last_observations[:3]
-            ):
-                warning = 'Action, IPythonRunCellObservation loop detected'
-                for error_message in self.SYNTAX_ERROR_MESSAGES:
-                    if error_message.startswith(
-                        'SyntaxError: unterminated string literal (detected at line'
-                    ):
-                        if self._check_for_consistent_line_error(
-                            last_observations[:3], error_message
-                        ):
-                            logger.warning(warning)
-                            return True
-                    elif error_message in (
-                        'SyntaxError: invalid syntax. Perhaps you forgot a comma?',
-                        'SyntaxError: incomplete input',
-                    ) and self._check_for_consistent_invalid_syntax(
-                        last_observations[:3], error_message
-                    ):
-                        logger.warning(warning)
-                        return True
-        return False
-
-    def _check_for_consistent_invalid_syntax(self, observations, error_message):
-        first_lines = []
-        valid_observations = []
-
-        for obs in observations:
-            content = obs.content
-            lines = content.strip().split('\n')
-
-            if len(lines) < 6:  # 6 because a real syntax error has at least 6 lines
-                return False
-
-            line1 = lines[0].strip()
-            if not line1.startswith('Cell In[1], line'):
-                return False
-
-            first_lines.append(line1)  # Store the first line of each observation
-
-            # Check last three lines
-            if (
-                lines[-1].startswith('[Jupyter Python interpreter:')
-                and lines[-2].startswith('[Jupyter current working directory:')
-                and error_message in lines[-3]
-            ):
-                valid_observations.append(obs)
-
-        # Check if:
-        # 1. All first lines are identical
-        # 2. We have exactly 3 valid observations
-        # 3. The error message line is identical in all valid observations
-        return (
-            len(set(first_lines)) == 1
-            and len(valid_observations) == 3
-            and len(
-                set(
-                    obs.content.strip().split('\n')[:-2][-1]
-                    for obs in valid_observations
-                )
-            )
-            == 1
-        )
-
-    def _check_for_consistent_line_error(self, observations, error_message):
-        error_lines = []
-
-        for obs in observations:
-            content = obs.content
-            lines = content.strip().split('\n')
-
-            if len(lines) < 3:
-                return False
-
-            last_lines = lines[-3:]
-
-            # Check if the last two lines are our own
-            if not (
-                last_lines[-2].startswith('[Jupyter current working directory:')
-                and last_lines[-1].startswith('[Jupyter Python interpreter:')
-            ):
-                return False
-
-            # Check for the error message in the 3rd-to-last line
-            if error_message in last_lines[-3]:
-                error_lines.append(last_lines[-3])
-
-        # Check if we found the error message in all 3 observations
-        # and the 3rd-to-last line is identical across all occurrences
-        return len(error_lines) == 3 and len(set(error_lines)) == 1
-
-    def _is_stuck_monologue(self, filtered_history):
-        # scenario 3: monologue
-        # check for repeated MessageActions with source=AGENT
-        # see if the agent is engaged in a good old monologue, telling itself the same thing over and over
-        agent_message_actions = [
-            (i, event)
-            for i, event in enumerate(filtered_history)
-            if isinstance(event, MessageAction) and event.source == EventSource.AGENT
-        ]
-
-        # last three message actions will do for this check
-        if len(agent_message_actions) >= 3:
-            last_agent_message_actions = agent_message_actions[-3:]
-
-            if all(
-                (last_agent_message_actions[0][1] == action[1])
-                for action in last_agent_message_actions
-            ):
-                # check if there are any observations between the repeated MessageActions
-                # then it's not yet a loop, maybe it can recover
-                start_index = last_agent_message_actions[0][0]
-                end_index = last_agent_message_actions[-1][0]
-
-                has_observation_between = False
-                for event in filtered_history[start_index + 1 : end_index]:
-                    if isinstance(event, Observation):
-                        has_observation_between = True
-                        break
-
-                if not has_observation_between:
-                    logger.warning('Repeated MessageAction with source=AGENT detected')
-                    return True
-        return False
-
-    def _is_stuck_action_observation_pattern(self, filtered_history):
-        # scenario 4: action, observation pattern on the last six steps
-        # check if the agent repeats the same (Action, Observation)
-        # every other step in the last six steps
-        last_six_actions: list[Event] = []
-        last_six_observations: list[Event] = []
-
-        # the end of history is most interesting
-        for event in reversed(filtered_history):
-            if isinstance(event, Action) and len(last_six_actions) < 6:
-                last_six_actions.append(event)
-            elif isinstance(event, Observation) and len(last_six_observations) < 6:
-                last_six_observations.append(event)
-
-            if len(last_six_actions) == 6 and len(last_six_observations) == 6:
-                break
-
-        # this pattern is every other step, like:
-        # (action_1, obs_1), (action_2, obs_2), (action_1, obs_1), (action_2, obs_2),...
-        if len(last_six_actions) == 6 and len(last_six_observations) == 6:
-            actions_equal = (
-                # action_0 == action_2 == action_4
-                self._eq_no_pid(last_six_actions[0], last_six_actions[2])
-                and self._eq_no_pid(last_six_actions[0], last_six_actions[4])
-                # action_1 == action_3 == action_5
-                and self._eq_no_pid(last_six_actions[1], last_six_actions[3])
-                and self._eq_no_pid(last_six_actions[1], last_six_actions[5])
-            )
-            observations_equal = (
-                # obs_0 == obs_2 == obs_4
-                self._eq_no_pid(last_six_observations[0], last_six_observations[2])
-                and self._eq_no_pid(last_six_observations[0], last_six_observations[4])
-                # obs_1 == obs_3 == obs_5
-                and self._eq_no_pid(last_six_observations[1], last_six_observations[3])
-                and self._eq_no_pid(last_six_observations[1], last_six_observations[5])
-            )
-
-            if actions_equal and observations_equal:
-                logger.warning('Action, Observation pattern detected')
-                return True
-        return False
-
-    def _eq_no_pid(self, obj1, obj2):
-        if isinstance(obj1, IPythonRunCellAction) and isinstance(
-            obj2, IPythonRunCellAction
-        ):
-            # for loop detection on edit actions, ignore the thought, compare some code
-            # the code should have at least 3 lines, to avoid simple one-liners
-            if (
-                'edit_file_by_replace(' in obj1.code
-                and 'edit_file_by_replace(' in obj2.code
-            ):
-                return (
-                    len(obj1.code.split('\n')) > 2
-                    and obj1.code.split('\n')[:3] == obj2.code.split('\n')[:3]
-                )
-            else:
-                # default comparison
-                return obj1 == obj2
-        elif isinstance(obj1, CmdOutputObservation) and isinstance(
-            obj2, CmdOutputObservation
-        ):
-            # for loop detection, ignore command_id, which is the pid
-            return obj1.command == obj2.command and obj1.exit_code == obj2.exit_code
-        else:
-            # this is the default comparison
-            return obj1 == obj2
@@ -1,231 +0,0 @@
-import asyncio
-import logging
-import sys
-from typing import Type
-from uuid import uuid4
-
-from termcolor import colored
-
-import openhands.agenthub  # noqa F401 (we import this to get the agents registered)
-from openhands import __version__
-from openhands.controller import AgentController
-from openhands.controller.agent import Agent
-from openhands.core.config import (
-    AppConfig,
-    get_parser,
-    load_app_config,
-)
-from openhands.core.logger import openhands_logger as logger
-from openhands.core.loop import run_agent_until_done
-from openhands.core.schema import AgentState
-from openhands.events import EventSource, EventStream, EventStreamSubscriber
-from openhands.events.action import (
-    Action,
-    ActionConfirmationStatus,
-    ChangeAgentStateAction,
-    CmdRunAction,
-    FileEditAction,
-    MessageAction,
-)
-from openhands.events.event import Event
-from openhands.events.observation import (
-    AgentStateChangedObservation,
-    CmdOutputObservation,
-    FileEditObservation,
-    NullObservation,
-)
-from openhands.llm.llm import LLM
-from openhands.runtime import get_runtime_cls
-from openhands.runtime.base import Runtime
-from openhands.security import SecurityAnalyzer, options
-from openhands.storage import get_file_store
-
-
-def display_message(message: str):
-    print(colored('🤖 ' + message + '\n', 'yellow'))
-
-
-def display_command(command: str):
-    print('❯ ' + colored(command + '\n', 'green'))
-
-
-def display_confirmation(confirmation_state: ActionConfirmationStatus):
-    if confirmation_state == ActionConfirmationStatus.CONFIRMED:
-        print(colored('✅ ' + confirmation_state + '\n', 'green'))
-    elif confirmation_state == ActionConfirmationStatus.REJECTED:
-        print(colored('❌ ' + confirmation_state + '\n', 'red'))
-    else:
-        print(colored('⏳ ' + confirmation_state + '\n', 'yellow'))
-
-
-def display_command_output(output: str):
-    lines = output.split('\n')
-    for line in lines:
-        if line.startswith('[Python Interpreter') or line.startswith('openhands@'):
-            # TODO: clean this up once we clean up terminal output
-            continue
-        print(colored(line, 'blue'))
-    print('\n')
-
-
-def display_file_edit(event: FileEditAction | FileEditObservation):
-    print(colored(str(event), 'green'))
-
-
-def display_event(event: Event, config: AppConfig):
-    if isinstance(event, Action):
-        if hasattr(event, 'thought'):
-            display_message(event.thought)
-    if isinstance(event, MessageAction):
-        if event.source == EventSource.AGENT:
-            display_message(event.content)
-    if isinstance(event, CmdRunAction):
-        display_command(event.command)
-    if isinstance(event, CmdOutputObservation):
-        display_command_output(event.content)
-    if isinstance(event, FileEditAction):
-        display_file_edit(event)
-    if isinstance(event, FileEditObservation):
-        display_file_edit(event)
-    if hasattr(event, 'confirmation_state') and config.security.confirmation_mode:
-        display_confirmation(event.confirmation_state)
-
-
-async def main():
-    """Runs the agent in CLI mode"""
-
-    parser = get_parser()
-    # Add the version argument
-    parser.add_argument(
-        '-v',
-        '--version',
-        action='version',
-        version=f'{__version__}',
-        help='Show the version number and exit',
-        default=None,
-    )
-    args = parser.parse_args()
-
-    if args.version:
-        print(f'OpenHands version: {__version__}')
-        return
-
-    logger.setLevel(logging.WARNING)
-    config = load_app_config(config_file=args.config_file)
-    sid = 'cli'
-
-    agent_cls: Type[Agent] = Agent.get_cls(config.default_agent)
-    agent_config = config.get_agent_config(config.default_agent)
-    llm_config = config.get_llm_config_from_agent(config.default_agent)
-    agent = agent_cls(
-        llm=LLM(config=llm_config),
-        config=agent_config,
-    )
-
-    file_store = get_file_store(config.file_store, config.file_store_path)
-    event_stream = EventStream(sid, file_store)
-
-    runtime_cls = get_runtime_cls(config.runtime)
-    runtime: Runtime = runtime_cls(  # noqa: F841
-        config=config,
-        event_stream=event_stream,
-        sid=sid,
-        plugins=agent_cls.sandbox_plugins,
-        headless_mode=True,
-    )
-
-    if config.security.security_analyzer:
-        options.SecurityAnalyzers.get(
-            config.security.security_analyzer, SecurityAnalyzer
-        )(event_stream)
-
-    controller = AgentController(
-        agent=agent,
-        max_iterations=config.max_iterations,
-        max_budget_per_task=config.max_budget_per_task,
-        agent_to_llm_config=config.get_agent_to_llm_config_map(),
-        event_stream=event_stream,
-        confirmation_mode=config.security.confirmation_mode,
-    )
-
-    async def prompt_for_next_task():
-        # Run input() in a thread pool to avoid blocking the event loop
-        loop = asyncio.get_event_loop()
-        next_message = await loop.run_in_executor(
-            None, lambda: input('How can I help? >> ')
-        )
-        if not next_message.strip():
-            await prompt_for_next_task()
-        if next_message == 'exit':
-            event_stream.add_event(
-                ChangeAgentStateAction(AgentState.STOPPED), EventSource.ENVIRONMENT
-            )
-            return
-        action = MessageAction(content=next_message)
-        event_stream.add_event(action, EventSource.USER)
-
-    async def prompt_for_user_confirmation():
-        loop = asyncio.get_event_loop()
-        user_confirmation = await loop.run_in_executor(
-            None, lambda: input('Confirm action (possible security risk)? (y/n) >> ')
-        )
-        return user_confirmation.lower() == 'y'
-
-    async def on_event(event: Event):
-        display_event(event, config)
-        if isinstance(event, AgentStateChangedObservation):
-            if event.agent_state in [
-                AgentState.AWAITING_USER_INPUT,
-                AgentState.FINISHED,
-            ]:
-                await prompt_for_next_task()
-        if (
-            isinstance(event, NullObservation)
-            and controller.state.agent_state == AgentState.AWAITING_USER_CONFIRMATION
-        ):
-            user_confirmed = await prompt_for_user_confirmation()
-            if user_confirmed:
-                event_stream.add_event(
-                    ChangeAgentStateAction(AgentState.USER_CONFIRMED), EventSource.USER
-                )
-            else:
-                event_stream.add_event(
-                    ChangeAgentStateAction(AgentState.USER_REJECTED), EventSource.USER
-                )
-
-    event_stream.subscribe(EventStreamSubscriber.MAIN, on_event, str(uuid4()))
-
-    await runtime.connect()
-
-    asyncio.create_task(prompt_for_next_task())
-
-    await run_agent_until_done(
-        controller, runtime, [AgentState.STOPPED, AgentState.ERROR]
-    )
-
-
-if __name__ == '__main__':
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        loop.run_until_complete(main())
-    except KeyboardInterrupt:
-        print('Received keyboard interrupt, shutting down...')
-    except ConnectionRefusedError as e:
-        print(f'Connection refused: {e}')
-        sys.exit(1)
-    except Exception as e:
-        print(f'An error occurred: {e}')
-        sys.exit(1)
-    finally:
-        try:
-            # Cancel all running tasks
-            pending = asyncio.all_tasks(loop)
-            for task in pending:
-                task.cancel()
-            # Wait for all tasks to complete with a timeout
-            loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
-            loop.close()
-        except Exception as e:
-            print(f'Error during cleanup: {e}')
-            sys.exit(1)
@@ -1,37 +0,0 @@
-from openhands.core.config.agent_config import AgentConfig
-from openhands.core.config.app_config import AppConfig
-from openhands.core.config.config_utils import (
-    OH_DEFAULT_AGENT,
-    OH_MAX_ITERATIONS,
-    get_field_info,
-)
-from openhands.core.config.llm_config import LLMConfig
-from openhands.core.config.sandbox_config import SandboxConfig
-from openhands.core.config.security_config import SecurityConfig
-from openhands.core.config.utils import (
-    finalize_config,
-    get_llm_config_arg,
-    get_parser,
-    load_app_config,
-    load_from_env,
-    load_from_toml,
-    parse_arguments,
-)
-
-__all__ = [
-    'OH_DEFAULT_AGENT',
-    'OH_MAX_ITERATIONS',
-    'AgentConfig',
-    'AppConfig',
-    'LLMConfig',
-    'SandboxConfig',
-    'SecurityConfig',
-    'load_app_config',
-    'load_from_env',
-    'load_from_toml',
-    'finalize_config',
-    'get_llm_config_arg',
-    'get_field_info',
-    'get_parser',
-    'parse_arguments',
-]
@@ -1,38 +0,0 @@
-from dataclasses import dataclass, fields
-
-from openhands.core.config.config_utils import get_field_info
-
-
-@dataclass
-class AgentConfig:
-    """Configuration for the agent.
-
-    Attributes:
-        function_calling: Whether function calling is enabled. Default is True.
-        codeact_enable_browsing: Whether browsing delegate is enabled in the action space. Default is False. Only works with function calling.
-        codeact_enable_llm_editor: Whether LLM editor is enabled in the action space. Default is False. Only works with function calling.
-        codeact_enable_jupyter: Whether Jupyter is enabled in the action space. Default is False.
-        micro_agent_name: The name of the micro agent to use for this agent.
-        memory_enabled: Whether long-term memory (embeddings) is enabled.
-        memory_max_threads: The maximum number of threads indexing at the same time for embeddings.
-        llm_config: The name of the llm config to use. If specified, this will override global llm config.
-        use_microagents: Whether to use microagents at all. Default is True.
-        disabled_microagents: A list of microagents to disable. Default is None.
-    """
-
-    codeact_enable_browsing: bool = True
-    codeact_enable_llm_editor: bool = False
-    codeact_enable_jupyter: bool = True
-    micro_agent_name: str | None = None
-    memory_enabled: bool = False
-    memory_max_threads: int = 3
-    llm_config: str | None = None
-    use_microagents: bool = True
-    disabled_microagents: list[str] | None = None
-
-    def defaults_to_dict(self) -> dict:
-        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
-        result = {}
-        for f in fields(self):
-            result[f.name] = get_field_info(f)
-        return result
@@ -1,156 +0,0 @@
-from dataclasses import dataclass, field, fields, is_dataclass
-from typing import ClassVar
-
-from openhands.core import logger
-from openhands.core.config.agent_config import AgentConfig
-from openhands.core.config.config_utils import (
-    OH_DEFAULT_AGENT,
-    OH_MAX_ITERATIONS,
-    get_field_info,
-)
-from openhands.core.config.llm_config import LLMConfig
-from openhands.core.config.sandbox_config import SandboxConfig
-from openhands.core.config.security_config import SecurityConfig
-
-
-@dataclass
-class AppConfig:
-    """Configuration for the app.
-
-    Attributes:
-        llms: Dictionary mapping LLM names to their configurations.
-            The default configuration is stored under the 'llm' key.
-        agents: Dictionary mapping agent names to their configurations.
-            The default configuration is stored under the 'agent' key.
-        default_agent: Name of the default agent to use.
-        sandbox: Sandbox configuration settings.
-        runtime: Runtime environment identifier.
-        file_store: Type of file store to use.
-        file_store_path: Path to the file store.
-        trajectories_path: Folder path to store trajectories.
-        workspace_base: Base path for the workspace. Defaults to `./workspace` as absolute path.
-        workspace_mount_path: Path to mount the workspace. Defaults to `workspace_base`.
-        workspace_mount_path_in_sandbox: Path to mount the workspace in sandbox. Defaults to `/workspace`.
-        workspace_mount_rewrite: Path to rewrite the workspace mount path.
-        cache_dir: Path to cache directory. Defaults to `/tmp/cache`.
-        run_as_openhands: Whether to run as openhands.
-        max_iterations: Maximum number of iterations allowed.
-        max_budget_per_task: Maximum budget per task, agent stops if exceeded.
-        e2b_api_key: E2B API key.
-        disable_color: Whether to disable terminal colors. For terminals that don't support color.
-        debug: Whether to enable debugging mode.
-        file_uploads_max_file_size_mb: Maximum file upload size in MB. `0` means unlimited.
-        file_uploads_restrict_file_types: Whether to restrict upload file types.
-        file_uploads_allowed_extensions: Allowed file extensions. `['.*']` allows all.
-    """
-
-    llms: dict[str, LLMConfig] = field(default_factory=dict)
-    agents: dict = field(default_factory=dict)
-    default_agent: str = OH_DEFAULT_AGENT
-    sandbox: SandboxConfig = field(default_factory=SandboxConfig)
-    security: SecurityConfig = field(default_factory=SecurityConfig)
-    runtime: str = 'eventstream'
-    file_store: str = 'memory'
-    file_store_path: str = '/tmp/file_store'
-    trajectories_path: str | None = None
-    workspace_base: str | None = None
-    workspace_mount_path: str | None = None
-    workspace_mount_path_in_sandbox: str = '/workspace'
-    workspace_mount_rewrite: str | None = None
-    cache_dir: str = '/tmp/cache'
-    run_as_openhands: bool = True
-    max_iterations: int = OH_MAX_ITERATIONS
-    max_budget_per_task: float | None = None
-    e2b_api_key: str = ''
-    modal_api_token_id: str = ''
-    modal_api_token_secret: str = ''
-    disable_color: bool = False
-    jwt_secret: str = ''
-    settings_store_class: str = (
-        'openhands.storage.file_settings_store.FileSettingsStore'
-    )
-    debug: bool = False
-    file_uploads_max_file_size_mb: int = 0
-    file_uploads_restrict_file_types: bool = False
-    file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*'])
-    runloop_api_key: str | None = None
-
-    defaults_dict: ClassVar[dict] = {}
-
-    def get_llm_config(self, name='llm') -> LLMConfig:
-        """'llm' is the name for default config (for backward compatibility prior to 0.8)."""
-        if name in self.llms:
-            return self.llms[name]
-        if name is not None and name != 'llm':
-            logger.openhands_logger.warning(
-                f'llm config group {name} not found, using default config'
-            )
-        if 'llm' not in self.llms:
-            self.llms['llm'] = LLMConfig()
-        return self.llms['llm']
-
-    def set_llm_config(self, value: LLMConfig, name='llm') -> None:
-        self.llms[name] = value
-
-    def get_agent_config(self, name='agent') -> AgentConfig:
-        """'agent' is the name for default config (for backward compatibility prior to 0.8)."""
-        if name in self.agents:
-            return self.agents[name]
-        if 'agent' not in self.agents:
-            self.agents['agent'] = AgentConfig()
-        return self.agents['agent']
-
-    def set_agent_config(self, value: AgentConfig, name='agent') -> None:
-        self.agents[name] = value
-
-    def get_agent_to_llm_config_map(self) -> dict[str, LLMConfig]:
-        """Get a map of agent names to llm configs."""
-        return {name: self.get_llm_config_from_agent(name) for name in self.agents}
-
-    def get_llm_config_from_agent(self, name='agent') -> LLMConfig:
-        agent_config: AgentConfig = self.get_agent_config(name)
-        llm_config_name = agent_config.llm_config
-        return self.get_llm_config(llm_config_name)
-
-    def get_agent_configs(self) -> dict[str, AgentConfig]:
-        return self.agents
-
-    def __post_init__(self):
-        """Post-initialization hook, called when the instance is created with only default values."""
-        AppConfig.defaults_dict = self.defaults_to_dict()
-
-    def defaults_to_dict(self) -> dict:
-        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
-        result = {}
-        for f in fields(self):
-            field_value = getattr(self, f.name)
-
-            # dataclasses compute their defaults themselves
-            if is_dataclass(type(field_value)):
-                result[f.name] = field_value.defaults_to_dict()
-            else:
-                result[f.name] = get_field_info(f)
-        return result
-
-    def __str__(self):
-        attr_str = []
-        for f in fields(self):
-            attr_name = f.name
-            attr_value = getattr(self, f.name)
-
-            if attr_name in [
-                'e2b_api_key',
-                'github_token',
-                'jwt_secret',
-                'modal_api_token_id',
-                'modal_api_token_secret',
-                'runloop_api_key',
-            ]:
-                attr_value = '******' if attr_value else None
-
-            attr_str.append(f'{attr_name}={repr(attr_value)}')
-
-        return f"AppConfig({', '.join(attr_str)}"
-
-    def __repr__(self):
-        return self.__str__()
@@ -1,39 +0,0 @@
-from types import UnionType
-from typing import get_args, get_origin
-
-OH_DEFAULT_AGENT = 'CodeActAgent'
-OH_MAX_ITERATIONS = 500
-
-
-def get_field_info(f):
-    """Extract information about a dataclass field: type, optional, and default.
-
-    Args:
-        f: The field to extract information from.
-
-    Returns: A dict with the field's type, whether it's optional, and its default value.
-    """
-    field_type = f.type
-    optional = False
-
-    # for types like str | None, find the non-None type and set optional to True
-    # this is useful for the frontend to know if a field is optional
-    # and to show the correct type in the UI
-    # Note: this only works for UnionTypes with None as one of the types
-    if get_origin(field_type) is UnionType:
-        types = get_args(field_type)
-        non_none_arg = next((t for t in types if t is not type(None)), None)
-        if non_none_arg is not None:
-            field_type = non_none_arg
-            optional = True
-
-    # type name in a pretty format
-    type_name = (
-        field_type.__name__ if hasattr(field_type, '__name__') else str(field_type)
-    )
-
-    # default is always present
-    default = f.default
-
-    # return a schema with the useful info for frontend
-    return {'type': type_name.lower(), 'optional': optional, 'default': default}
@@ -1,143 +0,0 @@
-import os
-from dataclasses import dataclass, fields
-from typing import Optional
-
-from openhands.core.config.config_utils import get_field_info
-from openhands.core.logger import LOG_DIR
-
-LLM_SENSITIVE_FIELDS = ['api_key', 'aws_access_key_id', 'aws_secret_access_key']
-
-
-@dataclass
-class LLMConfig:
-    """Configuration for the LLM model.
-
-    Attributes:
-        model: The model to use.
-        api_key: The API key to use.
-        base_url: The base URL for the API. This is necessary for local LLMs. It is also used for Azure embeddings.
-        api_version: The version of the API.
-        embedding_model: The embedding model to use.
-        embedding_base_url: The base URL for the embedding API.
-        embedding_deployment_name: The name of the deployment for the embedding API. This is used for Azure OpenAI.
-        aws_access_key_id: The AWS access key ID.
-        aws_secret_access_key: The AWS secret access key.
-        aws_region_name: The AWS region name.
-        num_retries: The number of retries to attempt.
-        retry_multiplier: The multiplier for the exponential backoff.
-        retry_min_wait: The minimum time to wait between retries, in seconds. This is exponential backoff minimum. For models with very low limits, this can be set to 15-20.
-        retry_max_wait: The maximum time to wait between retries, in seconds. This is exponential backoff maximum.
-        timeout: The timeout for the API.
-        max_message_chars: The approximate max number of characters in the content of an event included in the prompt to the LLM. Larger observations are truncated.
-        temperature: The temperature for the API.
-        top_p: The top p for the API.
-        custom_llm_provider: The custom LLM provider to use. This is undocumented in openhands, and normally not used. It is documented on the litellm side.
-        max_input_tokens: The maximum number of input tokens. Note that this is currently unused, and the value at runtime is actually the total tokens in OpenAI (e.g. 128,000 tokens for GPT-4).
-        max_output_tokens: The maximum number of output tokens. This is sent to the LLM.
-        input_cost_per_token: The cost per input token. This will available in logs for the user to check.
-        output_cost_per_token: The cost per output token. This will available in logs for the user to check.
-        ollama_base_url: The base URL for the OLLAMA API.
-        drop_params: Drop any unmapped (unsupported) params without causing an exception.
-        modify_params: Modify params allows litellm to do transformations like adding a default message, when a message is empty.
-        disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction).
-        caching_prompt: Use the prompt caching feature if provided by the LLM and supported by the provider.
-        log_completions: Whether to log LLM completions to the state.
-        log_completions_folder: The folder to log LLM completions to. Required if log_completions is True.
-        draft_editor: A more efficient LLM to use for file editing. Introduced in [PR 3985](https://github.com/All-Hands-AI/OpenHands/pull/3985).
-        custom_tokenizer: A custom tokenizer to use for token counting.
-    """
-
-    model: str = 'claude-3-5-sonnet-20241022'
-    api_key: str | None = None
-    base_url: str | None = None
-    api_version: str | None = None
-    embedding_model: str = 'local'
-    embedding_base_url: str | None = None
-    embedding_deployment_name: str | None = None
-    aws_access_key_id: str | None = None
-    aws_secret_access_key: str | None = None
-    aws_region_name: str | None = None
-    openrouter_site_url: str = 'https://docs.all-hands.dev/'
-    openrouter_app_name: str = 'OpenHands'
-    num_retries: int = 8
-    retry_multiplier: float = 2
-    retry_min_wait: int = 15
-    retry_max_wait: int = 120
-    timeout: int | None = None
-    max_message_chars: int = 30_000  # maximum number of characters in an observation's content when sent to the llm
-    temperature: float = 0.0
-    top_p: float = 1.0
-    custom_llm_provider: str | None = None
-    max_input_tokens: int | None = None
-    max_output_tokens: int | None = None
-    input_cost_per_token: float | None = None
-    output_cost_per_token: float | None = None
-    ollama_base_url: str | None = None
-    # This setting can be sent in each call to litellm
-    drop_params: bool = True
-    # Note: this setting is actually global, unlike drop_params
-    modify_params: bool = True
-    disable_vision: bool | None = None
-    caching_prompt: bool = True
-    log_completions: bool = False
-    log_completions_folder: str = os.path.join(LOG_DIR, 'completions')
-    draft_editor: Optional['LLMConfig'] = None
-    custom_tokenizer: str | None = None
-
-    def defaults_to_dict(self) -> dict:
-        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
-        result = {}
-        for f in fields(self):
-            result[f.name] = get_field_info(f)
-        return result
-
-    def __post_init__(self):
-        """
-        Post-initialization hook to assign OpenRouter-related variables to environment variables.
-        This ensures that these values are accessible to litellm at runtime.
-        """
-
-        # Assign OpenRouter-specific variables to environment variables
-        if self.openrouter_site_url:
-            os.environ['OR_SITE_URL'] = self.openrouter_site_url
-        if self.openrouter_app_name:
-            os.environ['OR_APP_NAME'] = self.openrouter_app_name
-
-    def __str__(self):
-        attr_str = []
-        for f in fields(self):
-            attr_name = f.name
-            attr_value = getattr(self, f.name)
-
-            if attr_name in LLM_SENSITIVE_FIELDS:
-                attr_value = '******' if attr_value else None
-
-            attr_str.append(f'{attr_name}={repr(attr_value)}')
-
-        return f"LLMConfig({', '.join(attr_str)})"
-
-    def __repr__(self):
-        return self.__str__()
-
-    def to_safe_dict(self):
-        """Return a dict with the sensitive fields replaced with ******."""
-        ret = self.__dict__.copy()
-        for k, v in ret.items():
-            if k in LLM_SENSITIVE_FIELDS:
-                ret[k] = '******' if v else None
-            elif isinstance(v, LLMConfig):
-                ret[k] = v.to_safe_dict()
-        return ret
-
-    @classmethod
-    def from_dict(cls, llm_config_dict: dict) -> 'LLMConfig':
-        """Create an LLMConfig object from a dictionary.
-
-        This function is used to create an LLMConfig object from a dictionary,
-        with the exception of the 'draft_editor' key, which is a nested LLMConfig object.
-        """
-        args = {k: v for k, v in llm_config_dict.items() if not isinstance(v, dict)}
-        if 'draft_editor' in llm_config_dict:
-            draft_editor_config = LLMConfig(**llm_config_dict['draft_editor'])
-            args['draft_editor'] = draft_editor_config
-        return cls(**args)
@@ -1,78 +0,0 @@
-import os
-from dataclasses import dataclass, field, fields
-
-from openhands.core.config.config_utils import get_field_info
-
-
-@dataclass
-class SandboxConfig:
-    """Configuration for the sandbox.
-
-    Attributes:
-        remote_runtime_api_url: The hostname for the Remote Runtime API.
-        local_runtime_url: The default hostname for the local runtime. You may want to change to http://host.docker.internal for DIND environments
-        base_container_image: The base container image from which to build the runtime image.
-        runtime_container_image: The runtime container image to use.
-        user_id: The user ID for the sandbox.
-        timeout: The timeout for the default sandbox action execution.
-        remote_runtime_init_timeout: The timeout for the remote runtime to start.
-        enable_auto_lint: Whether to enable auto-lint.
-        use_host_network: Whether to use the host network.
-        initialize_plugins: Whether to initialize plugins.
-        force_rebuild_runtime: Whether to force rebuild the runtime image.
-        runtime_extra_deps: The extra dependencies to install in the runtime image (typically used for evaluation).
-            This will be rendered into the end of the Dockerfile that builds the runtime image.
-            It can contain any valid shell commands (e.g., pip install numpy).
-            The path to the interpreter is available as $OH_INTERPRETER_PATH,
-            which can be used to install dependencies for the OH-specific Python interpreter.
-        runtime_startup_env_vars: The environment variables to set at the launch of the runtime.
-            This is a dictionary of key-value pairs.
-            This is useful for setting environment variables that are needed by the runtime.
-            For example, for specifying the base url of website for browsergym evaluation.
-        browsergym_eval_env: The BrowserGym environment to use for evaluation.
-            Default is None for general purpose browsing. Check evaluation/miniwob and evaluation/webarena for examples.
-        platform: The platform on which the image should be built. Default is None.
-    """
-
-    remote_runtime_api_url: str = 'http://localhost:8000'
-    local_runtime_url: str = 'http://localhost'
-    keep_runtime_alive: bool = False
-    rm_all_containers: bool = False
-    api_key: str | None = None
-    base_container_image: str = 'nikolaik/python-nodejs:python3.12-nodejs22'  # default to nikolaik/python-nodejs:python3.12-nodejs22 for eventstream runtime
-    runtime_container_image: str | None = None
-    user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
-    timeout: int = 120
-    remote_runtime_init_timeout: int = 180
-    enable_auto_lint: bool = (
-        False  # once enabled, OpenHands would lint files after editing
-    )
-    use_host_network: bool = False
-    runtime_extra_build_args: list[str] | None = None
-    initialize_plugins: bool = True
-    force_rebuild_runtime: bool = False
-    runtime_extra_deps: str | None = None
-    runtime_startup_env_vars: dict[str, str] = field(default_factory=dict)
-    browsergym_eval_env: str | None = None
-    platform: str | None = None
-    close_delay: int = 15
-
-    def defaults_to_dict(self) -> dict:
-        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
-        dict = {}
-        for f in fields(self):
-            dict[f.name] = get_field_info(f)
-        return dict
-
-    def __str__(self):
-        attr_str = []
-        for f in fields(self):
-            attr_name = f.name
-            attr_value = getattr(self, f.name)
-
-            attr_str.append(f'{attr_name}={repr(attr_value)}')
-
-        return f"SandboxConfig({', '.join(attr_str)})"
-
-    def __repr__(self):
-        return self.__str__()
@@ -1,40 +0,0 @@
-from dataclasses import dataclass, fields
-
-from openhands.core.config.config_utils import get_field_info
-
-
-@dataclass
-class SecurityConfig:
-    """Configuration for security related functionalities.
-
-    Attributes:
-        confirmation_mode: Whether to enable confirmation mode.
-        security_analyzer: The security analyzer to use.
-    """
-
-    confirmation_mode: bool = False
-    security_analyzer: str | None = None
-
-    def defaults_to_dict(self) -> dict:
-        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
-        dict = {}
-        for f in fields(self):
-            dict[f.name] = get_field_info(f)
-        return dict
-
-    def __str__(self):
-        attr_str = []
-        for f in fields(self):
-            attr_name = f.name
-            attr_value = getattr(self, f.name)
-
-            attr_str.append(f'{attr_name}={repr(attr_value)}')
-
-        return f"SecurityConfig({', '.join(attr_str)})"
-
-    @classmethod
-    def from_dict(cls, security_config_dict: dict) -> 'SecurityConfig':
-        return cls(**security_config_dict)
-
-    def __repr__(self):
-        return self.__str__()
@@ -1,429 +0,0 @@
-import argparse
-import os
-import pathlib
-import platform
-from dataclasses import is_dataclass
-from types import UnionType
-from typing import Any, MutableMapping, get_args, get_origin
-from uuid import uuid4
-
-import toml
-from dotenv import load_dotenv
-
-from openhands.core import logger
-from openhands.core.config.agent_config import AgentConfig
-from openhands.core.config.app_config import AppConfig
-from openhands.core.config.config_utils import (
-    OH_DEFAULT_AGENT,
-    OH_MAX_ITERATIONS,
-)
-from openhands.core.config.llm_config import LLMConfig
-from openhands.core.config.sandbox_config import SandboxConfig
-from openhands.core.config.security_config import SecurityConfig
-from openhands.storage import get_file_store
-from openhands.storage.files import FileStore
-
-JWT_SECRET = '.jwt_secret'
-load_dotenv()
-
-
-def load_from_env(cfg: AppConfig, env_or_toml_dict: dict | MutableMapping[str, str]):
-    """Reads the env-style vars and sets config attributes based on env vars or a config.toml dict.
-    Compatibility with vars like LLM_BASE_URL, AGENT_MEMORY_ENABLED, SANDBOX_TIMEOUT and others.
-
-    Args:
-        cfg: The AppConfig object to set attributes on.
-        env_or_toml_dict: The environment variables or a config.toml dict.
-    """
-
-    def get_optional_type(union_type: UnionType) -> Any:
-        """Returns the non-None type from a Union."""
-        types = get_args(union_type)
-        return next((t for t in types if t is not type(None)), None)
-
-    # helper function to set attributes based on env vars
-    def set_attr_from_env(sub_config: Any, prefix=''):
-        """Set attributes of a config dataclass based on environment variables."""
-        for field_name, field_type in sub_config.__annotations__.items():
-            # compute the expected env var name from the prefix and field name
-            # e.g. LLM_BASE_URL
-            env_var_name = (prefix + field_name).upper()
-
-            if is_dataclass(field_type):
-                # nested dataclass
-                nested_sub_config = getattr(sub_config, field_name)
-                set_attr_from_env(nested_sub_config, prefix=field_name + '_')
-            elif env_var_name in env_or_toml_dict:
-                # convert the env var to the correct type and set it
-                value = env_or_toml_dict[env_var_name]
-
-                # skip empty config values (fall back to default)
-                if not value:
-                    continue
-
-                try:
-                    # if it's an optional type, get the non-None type
-                    if get_origin(field_type) is UnionType:
-                        field_type = get_optional_type(field_type)
-
-                    # Attempt to cast the env var to type hinted in the dataclass
-                    if field_type is bool:
-                        cast_value = str(value).lower() in ['true', '1']
-                    else:
-                        cast_value = field_type(value)
-                    setattr(sub_config, field_name, cast_value)
-                except (ValueError, TypeError):
-                    logger.openhands_logger.error(
-                        f'Error setting env var {env_var_name}={value}: check that the value is of the right type'
-                    )
-
-    # Start processing from the root of the config object
-    set_attr_from_env(cfg)
-
-    # load default LLM config from env
-    default_llm_config = cfg.get_llm_config()
-    set_attr_from_env(default_llm_config, 'LLM_')
-    # load default agent config from env
-    default_agent_config = cfg.get_agent_config()
-    set_attr_from_env(default_agent_config, 'AGENT_')
-
-
-def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
-    """Load the config from the toml file. Supports both styles of config vars.
-
-    Args:
-        cfg: The AppConfig object to update attributes of.
-        toml_file: The path to the toml file. Defaults to 'config.toml'.
-    """
-    # try to read the config.toml file into the config object
-    try:
-        with open(toml_file, 'r', encoding='utf-8') as toml_contents:
-            toml_config = toml.load(toml_contents)
-    except FileNotFoundError:
-        return
-    except toml.TomlDecodeError as e:
-        logger.openhands_logger.warning(
-            f'Cannot parse config from toml, toml values have not been applied.\nError: {e}',
-            exc_info=False,
-        )
-        return
-
-    # if there was an exception or core is not in the toml, try to use the old-style toml
-    if 'core' not in toml_config:
-        # re-use the env loader to set the config from env-style vars
-        load_from_env(cfg, toml_config)
-        return
-
-    core_config = toml_config['core']
-
-    # load llm configs and agent configs
-    for key, value in toml_config.items():
-        if isinstance(value, dict):
-            try:
-                if key is not None and key.lower() == 'agent':
-                    logger.openhands_logger.debug(
-                        'Attempt to load default agent config from config toml'
-                    )
-                    non_dict_fields = {
-                        k: v for k, v in value.items() if not isinstance(v, dict)
-                    }
-                    agent_config = AgentConfig(**non_dict_fields)
-                    cfg.set_agent_config(agent_config, 'agent')
-                    for nested_key, nested_value in value.items():
-                        if isinstance(nested_value, dict):
-                            logger.openhands_logger.debug(
-                                f'Attempt to load group {nested_key} from config toml as agent config'
-                            )
-                            agent_config = AgentConfig(**nested_value)
-                            cfg.set_agent_config(agent_config, nested_key)
-                elif key is not None and key.lower() == 'llm':
-                    logger.openhands_logger.debug(
-                        'Attempt to load default LLM config from config toml'
-                    )
-                    llm_config = LLMConfig.from_dict(value)
-                    cfg.set_llm_config(llm_config, 'llm')
-                    for nested_key, nested_value in value.items():
-                        if isinstance(nested_value, dict):
-                            logger.openhands_logger.debug(
-                                f'Attempt to load group {nested_key} from config toml as llm config'
-                            )
-                            llm_config = LLMConfig.from_dict(nested_value)
-                            cfg.set_llm_config(llm_config, nested_key)
-                elif key is not None and key.lower() == 'security':
-                    logger.openhands_logger.debug(
-                        'Attempt to load security config from config toml'
-                    )
-                    security_config = SecurityConfig.from_dict(value)
-                    cfg.security = security_config
-                elif not key.startswith('sandbox') and key.lower() != 'core':
-                    logger.openhands_logger.warning(
-                        f'Unknown key in {toml_file}: "{key}"'
-                    )
-            except (TypeError, KeyError) as e:
-                logger.openhands_logger.warning(
-                    f'Cannot parse config from toml, toml values have not been applied.\n Error: {e}',
-                    exc_info=False,
-                )
-        else:
-            logger.openhands_logger.warning(f'Unknown key in {toml_file}: "{key}')
-
-    try:
-        # set sandbox config from the toml file
-        sandbox_config = cfg.sandbox
-
-        # migrate old sandbox configs from [core] section to sandbox config
-        keys_to_migrate = [key for key in core_config if key.startswith('sandbox_')]
-        for key in keys_to_migrate:
-            new_key = key.replace('sandbox_', '')
-            if new_key in sandbox_config.__annotations__:
-                # read the key in sandbox and remove it from core
-                setattr(sandbox_config, new_key, core_config.pop(key))
-            else:
-                logger.openhands_logger.warning(f'Unknown sandbox config: {key}')
-
-        # the new style values override the old style values
-        if 'sandbox' in toml_config:
-            sandbox_config = SandboxConfig(**toml_config['sandbox'])
-
-        # update the config object with the new values
-        cfg.sandbox = sandbox_config
-        for key, value in core_config.items():
-            if hasattr(cfg, key):
-                setattr(cfg, key, value)
-            else:
-                logger.openhands_logger.warning(f'Unknown core config key: {key}')
-    except (TypeError, KeyError) as e:
-        logger.openhands_logger.warning(
-            f'Cannot parse config from toml, toml values have not been applied.\nError: {e}',
-            exc_info=False,
-        )
-
-
-def get_or_create_jwt_secret(file_store: FileStore) -> str:
-    try:
-        jwt_secret = file_store.read(JWT_SECRET)
-        return jwt_secret
-    except FileNotFoundError:
-        new_secret = uuid4().hex
-        file_store.write(JWT_SECRET, new_secret)
-        return new_secret
-
-
-def finalize_config(cfg: AppConfig):
-    """More tweaks to the config after it's been loaded."""
-    if cfg.workspace_base is not None:
-        cfg.workspace_base = os.path.abspath(cfg.workspace_base)
-        if cfg.workspace_mount_path is None:
-            cfg.workspace_mount_path = cfg.workspace_base
-
-        if cfg.workspace_mount_rewrite:
-            base = cfg.workspace_base or os.getcwd()
-            parts = cfg.workspace_mount_rewrite.split(':')
-            cfg.workspace_mount_path = base.replace(parts[0], parts[1])
-
-    # make sure log_completions_folder is an absolute path
-    for llm in cfg.llms.values():
-        llm.log_completions_folder = os.path.abspath(llm.log_completions_folder)
-        if llm.embedding_base_url is None:
-            llm.embedding_base_url = llm.base_url
-
-    if cfg.sandbox.use_host_network and platform.system() == 'Darwin':
-        logger.openhands_logger.warning(
-            'Please upgrade to Docker Desktop 4.29.0 or later to use host network mode on macOS. '
-            'See https://github.com/docker/roadmap/issues/238#issuecomment-2044688144 for more information.'
-        )
-
-    # make sure cache dir exists
-    if cfg.cache_dir:
-        pathlib.Path(cfg.cache_dir).mkdir(parents=True, exist_ok=True)
-
-    if not cfg.jwt_secret:
-        cfg.jwt_secret = get_or_create_jwt_secret(
-            get_file_store(cfg.file_store, cfg.file_store_path)
-        )
-
-
-# Utility function for command line --group argument
-def get_llm_config_arg(
-    llm_config_arg: str, toml_file: str = 'config.toml'
-) -> LLMConfig | None:
-    """Get a group of llm settings from the config file.
-
-    A group in config.toml can look like this:
-
-    ```
-    [llm.gpt-3.5-for-eval]
-    model = 'gpt-3.5-turbo'
-    api_key = '...'
-    temperature = 0.5
-    num_retries = 8
-    ...
-    ```
-
-    The user-defined group name, like "gpt-3.5-for-eval", is the argument to this function. The function will load the LLMConfig object
-    with the settings of this group, from the config file, and set it as the LLMConfig object for the app.
-
-    Note that the group must be under "llm" group, or in other words, the group name must start with "llm.".
-
-    Args:
-        llm_config_arg: The group of llm settings to get from the config.toml file.
-        toml_file: Path to the configuration file to read from. Defaults to 'config.toml'.
-
-    Returns:
-        LLMConfig: The LLMConfig object with the settings from the config file.
-    """
-    # keep only the name, just in case
-    llm_config_arg = llm_config_arg.strip('[]')
-
-    # truncate the prefix, just in case
-    if llm_config_arg.startswith('llm.'):
-        llm_config_arg = llm_config_arg[4:]
-
-    logger.openhands_logger.debug(f'Loading llm config from {llm_config_arg}')
-
-    # load the toml file
-    try:
-        with open(toml_file, 'r', encoding='utf-8') as toml_contents:
-            toml_config = toml.load(toml_contents)
-    except FileNotFoundError as e:
-        logger.openhands_logger.error(f'Config file not found: {e}')
-        return None
-    except toml.TomlDecodeError as e:
-        logger.openhands_logger.error(
-            f'Cannot parse llm group from {llm_config_arg}. Exception: {e}'
-        )
-        return None
-
-    # update the llm config with the specified section
-    if 'llm' in toml_config and llm_config_arg in toml_config['llm']:
-        return LLMConfig.from_dict(toml_config['llm'][llm_config_arg])
-    logger.openhands_logger.debug(f'Loading from toml failed for {llm_config_arg}')
-    return None
-
-
-# Command line arguments
-def get_parser() -> argparse.ArgumentParser:
-    """Get the parser for the command line arguments."""
-    parser = argparse.ArgumentParser(description='Run an agent with a specific task')
-    parser.add_argument(
-        '--config-file',
-        type=str,
-        default='config.toml',
-        help='Path to the config file (default: config.toml in the current directory)',
-    )
-    parser.add_argument(
-        '-d',
-        '--directory',
-        type=str,
-        help='The working directory for the agent',
-    )
-    parser.add_argument(
-        '-t',
-        '--task',
-        type=str,
-        default='',
-        help='The task for the agent to perform',
-    )
-    parser.add_argument(
-        '-f',
-        '--file',
-        type=str,
-        help='Path to a file containing the task. Overrides -t if both are provided.',
-    )
-    parser.add_argument(
-        '-c',
-        '--agent-cls',
-        default=OH_DEFAULT_AGENT,
-        type=str,
-        help='Name of the default agent to use',
-    )
-    parser.add_argument(
-        '-i',
-        '--max-iterations',
-        default=OH_MAX_ITERATIONS,
-        type=int,
-        help='The maximum number of iterations to run the agent',
-    )
-    parser.add_argument(
-        '-b',
-        '--max-budget-per-task',
-        type=float,
-        help='The maximum budget allowed per task, beyond which the agent will stop.',
-    )
-    # --eval configs are for evaluations only
-    parser.add_argument(
-        '--eval-output-dir',
-        default='evaluation/evaluation_outputs/outputs',
-        type=str,
-        help='The directory to save evaluation output',
-    )
-    parser.add_argument(
-        '--eval-n-limit',
-        default=None,
-        type=int,
-        help='The number of instances to evaluate',
-    )
-    parser.add_argument(
-        '--eval-num-workers',
-        default=4,
-        type=int,
-        help='The number of workers to use for evaluation',
-    )
-    parser.add_argument(
-        '--eval-note',
-        default=None,
-        type=str,
-        help='The note to add to the evaluation directory',
-    )
-    parser.add_argument(
-        '-l',
-        '--llm-config',
-        default=None,
-        type=str,
-        help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml',
-    )
-    parser.add_argument(
-        '-n',
-        '--name',
-        default='default',
-        type=str,
-        help='Name for the session',
-    )
-    parser.add_argument(
-        '--eval-ids',
-        default=None,
-        type=str,
-        help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
-    )
-    parser.add_argument(
-        '--no-auto-continue',
-        action='store_true',
-        help='Disable automatic "continue" responses. Will read from stdin instead.',
-    )
-    return parser
-
-
-def parse_arguments() -> argparse.Namespace:
-    """Parse the command line arguments."""
-    parser = get_parser()
-    parsed_args, _ = parser.parse_known_args()
-    return parsed_args
-
-
-def load_app_config(
-    set_logging_levels: bool = True, config_file: str = 'config.toml'
-) -> AppConfig:
-    """Load the configuration from the specified config file and environment variables.
-
-    Args:
-        set_logging_levels: Whether to set the global variables for logging levels.
-        config_file: Path to the config file. Defaults to 'config.toml' in the current directory.
-    """
-    config = AppConfig()
-    load_from_toml(config, config_file)
-    load_from_env(config, os.environ)
-    finalize_config(config)
-    if set_logging_levels:
-        logger.DEBUG = config.debug
-        logger.DISABLE_COLOR_PRINTING = config.disable_color
-    return config
@@ -1 +0,0 @@
-TROUBLESHOOTING_URL = 'https://docs.all-hands.dev/modules/usage/troubleshooting'
@@ -1,2 +0,0 @@
-# Run this file to trigger a model download
-import openhands.agenthub  # noqa F401 (we import this to get the agents registered)
@@ -1,123 +0,0 @@
-class AgentNoInstructionError(Exception):
-    def __init__(self, message='Instruction must be provided'):
-        super().__init__(message)
-
-
-class AgentEventTypeError(Exception):
-    def __init__(self, message='Event must be a dictionary'):
-        super().__init__(message)
-
-
-class AgentAlreadyRegisteredError(Exception):
-    def __init__(self, name=None):
-        if name is not None:
-            message = f"Agent class already registered under '{name}'"
-        else:
-            message = 'Agent class already registered'
-        super().__init__(message)
-
-
-class AgentNotRegisteredError(Exception):
-    def __init__(self, name=None):
-        if name is not None:
-            message = f"No agent class registered under '{name}'"
-        else:
-            message = 'No agent class registered'
-        super().__init__(message)
-
-
-class TaskInvalidStateError(Exception):
-    def __init__(self, state=None):
-        if state is not None:
-            message = f'Invalid state {state}'
-        else:
-            message = 'Invalid state'
-        super().__init__(message)
-
-
-class BrowserInitException(Exception):
-    def __init__(self, message='Failed to initialize browser environment'):
-        super().__init__(message)
-
-
-class BrowserUnavailableException(Exception):
-    def __init__(
-        self,
-        message='Browser environment is not available, please check if has been initialized',
-    ):
-        super().__init__(message)
-
-
-# This exception gets sent back to the LLM
-# It might be malformed JSON
-class LLMMalformedActionError(Exception):
-    def __init__(self, message='Malformed response'):
-        self.message = message
-        super().__init__(message)
-
-    def __str__(self):
-        return self.message
-
-
-# This exception gets sent back to the LLM
-# For some reason, the agent did not return an action
-class LLMNoActionError(Exception):
-    def __init__(self, message='Agent must return an action'):
-        super().__init__(message)
-
-
-# This exception gets sent back to the LLM
-# The LLM output did not include an action, or the action was not the expected type
-class LLMResponseError(Exception):
-    def __init__(self, message='Failed to retrieve action from LLM response'):
-        super().__init__(message)
-
-
-class UserCancelledError(Exception):
-    def __init__(self, message='User cancelled the request'):
-        super().__init__(message)
-
-
-class MicroAgentValidationError(Exception):
-    def __init__(self, message='Micro agent validation failed'):
-        super().__init__(message)
-
-
-class OperationCancelled(Exception):
-    """Exception raised when an operation is cancelled (e.g. by a keyboard interrupt)."""
-
-    def __init__(self, message='Operation was cancelled'):
-        super().__init__(message)
-
-
-class CloudFlareBlockageError(Exception):
-    """Exception raised when a request is blocked by CloudFlare."""
-
-    pass
-
-
-class FunctionCallConversionError(Exception):
-    """Exception raised when FunctionCallingConverter failed to convert a non-function call message to a function call message.
-
-    This typically happens when there's a malformed message (e.g., missing <function=...> tags). But not due to LLM output.
-    """
-
-    def __init__(self, message):
-        super().__init__(message)
-
-
-class FunctionCallValidationError(Exception):
-    """Exception raised when FunctionCallingConverter failed to validate a function call message.
-
-    This typically happens when the LLM outputs unrecognized function call / parameter names / values.
-    """
-
-    def __init__(self, message):
-        super().__init__(message)
-
-
-class FunctionCallNotExistsError(Exception):
-    """Exception raised when an LLM call a tool that is not registered."""
-
-    def __init__(self, message):
-        super().__init__(message)
@@ -1,355 +0,0 @@
-import copy
-import logging
-import os
-import re
-import sys
-import traceback
-from datetime import datetime
-from types import TracebackType
-from typing import Any, Literal, Mapping
-
-from termcolor import colored
-
-LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO').upper()
-DEBUG = os.getenv('DEBUG', 'False').lower() in ['true', '1', 'yes']
-if DEBUG:
-    LOG_LEVEL = 'DEBUG'
-
-LOG_TO_FILE = os.getenv('LOG_TO_FILE', 'False').lower() in ['true', '1', 'yes']
-DISABLE_COLOR_PRINTING = False
-
-LOG_ALL_EVENTS = os.getenv('LOG_ALL_EVENTS', 'False').lower() in ['true', '1', 'yes']
-
-ColorType = Literal[
-    'red',
-    'green',
-    'yellow',
-    'blue',
-    'magenta',
-    'cyan',
-    'light_grey',
-    'dark_grey',
-    'light_red',
-    'light_green',
-    'light_yellow',
-    'light_blue',
-    'light_magenta',
-    'light_cyan',
-    'white',
-]
-
-LOG_COLORS: Mapping[str, ColorType] = {
-    'ACTION': 'green',
-    'USER_ACTION': 'light_red',
-    'OBSERVATION': 'yellow',
-    'USER_OBSERVATION': 'light_green',
-    'DETAIL': 'cyan',
-    'ERROR': 'red',
-    'PLAN': 'light_magenta',
-}
-
-
-class NoColorFormatter(logging.Formatter):
-    """Formatter for non-colored logging in files."""
-
-    def format(self, record: logging.LogRecord) -> str:
-        # Create a deep copy of the record to avoid modifying the original
-        new_record: logging.LogRecord = copy.deepcopy(record)
-        # Strip ANSI color codes from the message
-        new_record.msg = strip_ansi(new_record.msg)
-
-        return super().format(new_record)
-
-
-def strip_ansi(s: str) -> str:
-    """Remove ANSI escape sequences (terminal color/formatting codes) from string.
-
-    Removes ANSI escape sequences from str, as defined by ECMA-048 in
-    http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-048.pdf
-    # https://github.com/ewen-lbh/python-strip-ansi/blob/master/strip_ansi/__init__.py
-    """
-    pattern = re.compile(r'\x1B\[\d+(;\d+){0,2}m')
-    stripped = pattern.sub('', s)
-    return stripped
-
-
-class ColoredFormatter(logging.Formatter):
-    def format(self, record):
-        msg_type = record.__dict__.get('msg_type')
-        event_source = record.__dict__.get('event_source')
-        if event_source:
-            new_msg_type = f'{event_source.upper()}_{msg_type}'
-            if new_msg_type in LOG_COLORS:
-                msg_type = new_msg_type
-        if msg_type in LOG_COLORS and not DISABLE_COLOR_PRINTING:
-            msg_type_color = colored(msg_type, LOG_COLORS[msg_type])
-            msg = colored(record.msg, LOG_COLORS[msg_type])
-            time_str = colored(
-                self.formatTime(record, self.datefmt), LOG_COLORS[msg_type]
-            )
-            name_str = colored(record.name, LOG_COLORS[msg_type])
-            level_str = colored(record.levelname, LOG_COLORS[msg_type])
-            if msg_type in ['ERROR'] or DEBUG:
-                return f'{time_str} - {name_str}:{level_str}: {record.filename}:{record.lineno}\n{msg_type_color}\n{msg}'
-            return f'{time_str} - {msg_type_color}\n{msg}'
-        elif msg_type == 'STEP':
-            if LOG_ALL_EVENTS:
-                msg = '\n\n==============\n' + record.msg + '\n'
-                return f'{msg}'
-            else:
-                return record.msg
-        return super().format(record)
-
-
-file_formatter = NoColorFormatter(
-    '%(asctime)s - %(name)s:%(levelname)s: %(filename)s:%(lineno)s - %(message)s',
-    datefmt='%H:%M:%S',
-)
-llm_formatter = logging.Formatter('%(message)s')
-
-
-class RollingLogger:
-    max_lines: int
-    char_limit: int
-    log_lines: list[str]
-
-    def __init__(self, max_lines=10, char_limit=80):
-        self.max_lines = max_lines
-        self.char_limit = char_limit
-        self.log_lines = [''] * self.max_lines
-
-    def is_enabled(self):
-        return DEBUG and sys.stdout.isatty()
-
-    def start(self, message=''):
-        if message:
-            print(message)
-        self._write('\n' * self.max_lines)
-        self._flush()
-
-    def add_line(self, line):
-        self.log_lines.pop(0)
-        self.log_lines.append(line[: self.char_limit])
-        self.print_lines()
-
-    def write_immediately(self, line):
-        self._write(line)
-        self._flush()
-
-    def print_lines(self):
-        """Display the last n log_lines in the console (not for file logging).
-
-        This will create the effect of a rolling display in the console.
-        """
-        self.move_back()
-        for line in self.log_lines:
-            self.replace_current_line(line)
-
-    def move_back(self, amount=-1):
-        r"""'\033[F' moves the cursor up one line."""
-        if amount == -1:
-            amount = self.max_lines
-        self._write('\033[F' * (self.max_lines))
-        self._flush()
-
-    def replace_current_line(self, line=''):
-        r"""'\033[2K\r' clears the line and moves the cursor to the beginning of the line."""
-        self._write('\033[2K' + line + '\n')
-        self._flush()
-
-    def _write(self, line):
-        if not self.is_enabled():
-            return
-        sys.stdout.write(line)
-
-    def _flush(self):
-        if not self.is_enabled():
-            return
-        sys.stdout.flush()
-
-
-class SensitiveDataFilter(logging.Filter):
-    def filter(self, record):
-        # start with attributes
-        sensitive_patterns = [
-            'api_key',
-            'aws_access_key_id',
-            'aws_secret_access_key',
-            'e2b_api_key',
-            'github_token',
-            'jwt_secret',
-            'modal_api_token_id',
-            'modal_api_token_secret',
-        ]
-
-        # add env var names
-        env_vars = [attr.upper() for attr in sensitive_patterns]
-        sensitive_patterns.extend(env_vars)
-
-        # and some special cases
-        sensitive_patterns.append('JWT_SECRET')
-        sensitive_patterns.append('LLM_API_KEY')
-        sensitive_patterns.append('GITHUB_TOKEN')
-        sensitive_patterns.append('SANDBOX_ENV_GITHUB_TOKEN')
-
-        # this also formats the message with % args
-        msg = record.getMessage()
-        record.args = ()
-
-        for attr in sensitive_patterns:
-            pattern = rf"{attr}='?([\w-]+)'?"
-            msg = re.sub(pattern, f"{attr}='******'", msg)
-
-        # passed with msg
-        record.msg = msg
-        return True
-
-
-def get_console_handler(log_level: int = logging.INFO, extra_info: str | None = None):
-    """Returns a console handler for logging."""
-    console_handler = logging.StreamHandler()
-    console_handler.setLevel(log_level)
-    formatter_str = '\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s'
-    if extra_info:
-        formatter_str = f'{extra_info} - ' + formatter_str
-    console_handler.setFormatter(ColoredFormatter(formatter_str, datefmt='%H:%M:%S'))
-    return console_handler
-
-
-def get_file_handler(log_dir: str, log_level: int = logging.INFO):
-    """Returns a file handler for logging."""
-    os.makedirs(log_dir, exist_ok=True)
-    timestamp = datetime.now().strftime('%Y-%m-%d')
-    file_name = f'openhands_{timestamp}.log'
-    file_handler = logging.FileHandler(os.path.join(log_dir, file_name))
-    file_handler.setLevel(log_level)
-    file_handler.setFormatter(file_formatter)
-    return file_handler
-
-
-# Set up logging
-logging.basicConfig(level=logging.ERROR)
-
-
-def log_uncaught_exceptions(
-    ex_cls: type[BaseException], ex: BaseException, tb: TracebackType | None
-) -> Any:
-    """Logs uncaught exceptions along with the traceback.
-
-    Args:
-        ex_cls: The type of the exception.
-        ex: The exception instance.
-        tb: The traceback object.
-
-    Returns:
-        None
-    """
-    if tb:  # Add check since tb can be None
-        logging.error(''.join(traceback.format_tb(tb)))
-    logging.error('{0}: {1}'.format(ex_cls, ex))
-
-
-sys.excepthook = log_uncaught_exceptions
-openhands_logger = logging.getLogger('openhands')
-current_log_level = logging.INFO
-
-if LOG_LEVEL in logging.getLevelNamesMapping():
-    current_log_level = logging.getLevelNamesMapping()[LOG_LEVEL]
-openhands_logger.setLevel(current_log_level)
-
-if current_log_level == logging.DEBUG:
-    LOG_TO_FILE = True
-    openhands_logger.debug('DEBUG mode enabled.')
-
-openhands_logger.addHandler(get_console_handler(current_log_level))
-openhands_logger.addFilter(SensitiveDataFilter(openhands_logger.name))
-openhands_logger.propagate = False
-openhands_logger.debug('Logging initialized')
-
-LOG_DIR = os.path.join(
-    # parent dir of openhands/core (i.e., root of the repo)
-    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
-    'logs',
-)
-
-if LOG_TO_FILE:
-    openhands_logger.addHandler(
-        get_file_handler(LOG_DIR, current_log_level)
-    )  # default log to project root
-    openhands_logger.debug(f'Logging to file in: {LOG_DIR}')
-
-# Exclude LiteLLM from logging output
-logging.getLogger('LiteLLM').disabled = True
-logging.getLogger('LiteLLM Router').disabled = True
-logging.getLogger('LiteLLM Proxy').disabled = True
-
-
-class LlmFileHandler(logging.FileHandler):
-    """LLM prompt and response logging."""
-
-    def __init__(self, filename, mode='a', encoding='utf-8', delay=False):
-        """Initializes an instance of LlmFileHandler.
-
-        Args:
-            filename (str): The name of the log file.
-            mode (str, optional): The file mode. Defaults to 'a'.
-            encoding (str, optional): The file encoding. Defaults to None.
-            delay (bool, optional): Whether to delay file opening. Defaults to False.
-        """
-        self.filename = filename
-        self.message_counter = 1
-        if DEBUG:
-            self.session = datetime.now().strftime('%y-%m-%d_%H-%M')
-        else:
-            self.session = 'default'
-        self.log_directory = os.path.join(LOG_DIR, 'llm', self.session)
-        os.makedirs(self.log_directory, exist_ok=True)
-        if not DEBUG:
-            # Clear the log directory if not in debug mode
-            for file in os.listdir(self.log_directory):
-                file_path = os.path.join(self.log_directory, file)
-                try:
-                    os.unlink(file_path)
-                except Exception as e:
-                    openhands_logger.error(
-                        'Failed to delete %s. Reason: %s', file_path, e
-                    )
-        filename = f'{self.filename}_{self.message_counter:03}.log'
-        self.baseFilename = os.path.join(self.log_directory, filename)
-        super().__init__(self.baseFilename, mode, encoding, delay)
-
-    def emit(self, record):
-        """Emits a log record.
-
-        Args:
-            record (logging.LogRecord): The log record to emit.
-        """
-        filename = f'{self.filename}_{self.message_counter:03}.log'
-        self.baseFilename = os.path.join(self.log_directory, filename)
-        self.stream = self._open()
-        super().emit(record)
-        self.stream.close()
-        openhands_logger.debug('Logging to %s', self.baseFilename)
-        self.message_counter += 1
-
-
-def _get_llm_file_handler(name: str, log_level: int):
-    # The 'delay' parameter, when set to True, postpones the opening of the log file
-    # until the first log message is emitted.
-    llm_file_handler = LlmFileHandler(name, delay=True)
-    llm_file_handler.setFormatter(llm_formatter)
-    llm_file_handler.setLevel(log_level)
-    return llm_file_handler
-
-
-def _setup_llm_logger(name: str, log_level: int):
-    logger = logging.getLogger(name)
-    logger.propagate = False
-    logger.setLevel(log_level)
-    if LOG_TO_FILE:
-        logger.addHandler(_get_llm_file_handler(name, log_level))
-    return logger
-
-
-llm_prompt_logger = _setup_llm_logger('prompt', current_log_level)
-llm_response_logger = _setup_llm_logger('response', current_log_level)
@@ -1,50 +0,0 @@
-import asyncio
-
-from openhands.controller import AgentController
-from openhands.core.logger import openhands_logger as logger
-from openhands.core.schema import AgentState
-from openhands.runtime.base import Runtime
-
-
-async def run_agent_until_done(
-    controller: AgentController,
-    runtime: Runtime,
-    end_states: list[AgentState],
-):
-    """
-    run_agent_until_done takes a controller and a runtime, and will run
-    the agent until it reaches a terminal state.
-    Note that runtime must be connected before being passed in here.
-    """
-    controller.agent_task = asyncio.create_task(controller.start_step_loop())
-
-    def status_callback(msg_type, msg_id, msg):
-        if msg_type == 'error':
-            logger.error(msg)
-            if controller:
-                controller.state.last_error = msg
-                asyncio.create_task(controller.set_agent_state_to(AgentState.ERROR))
-        else:
-            logger.info(msg)
-
-    if hasattr(runtime, 'status_callback') and runtime.status_callback:
-        raise ValueError(
-            'Runtime status_callback was set, but run_agent_until_done will override it'
-        )
-    if hasattr(controller, 'status_callback') and controller.status_callback:
-        raise ValueError(
-            'Controller status_callback was set, but run_agent_until_done will override it'
-        )
-
-    runtime.status_callback = status_callback
-    controller.status_callback = status_callback
-
-    while controller.state.agent_state not in end_states:
-        await asyncio.sleep(1)
-
-    if not controller.agent_task.done():
-        controller.agent_task.cancel()
-        try:
-            await controller.agent_task
-        except asyncio.CancelledError:
-            pass
@@ -1,304 +0,0 @@
-import asyncio
-import hashlib
-import json
-import os
-import sys
-import uuid
-from typing import Callable, Protocol, Type
-
-import openhands.agenthub  # noqa F401 (we import this to get the agents registered)
-from openhands.controller import AgentController
-from openhands.controller.agent import Agent
-from openhands.controller.state.state import State
-from openhands.core.config import (
-    AppConfig,
-    get_llm_config_arg,
-    load_app_config,
-    parse_arguments,
-)
-from openhands.core.logger import openhands_logger as logger
-from openhands.core.loop import run_agent_until_done
-from openhands.core.schema import AgentState
-from openhands.events import EventSource, EventStream, EventStreamSubscriber
-from openhands.events.action import MessageAction
-from openhands.events.action.action import Action
-from openhands.events.event import Event
-from openhands.events.observation import AgentStateChangedObservation
-from openhands.events.serialization.event import event_to_trajectory
-from openhands.llm.llm import LLM
-from openhands.runtime import get_runtime_cls
-from openhands.runtime.base import Runtime
-from openhands.storage import get_file_store
-
-
-class FakeUserResponseFunc(Protocol):
-    def __call__(
-        self,
-        state: State,
-        encapsulate_solution: bool = False,
-        try_parse: Callable[[Action | None], str] | None = None,
-    ) -> str: ...
-
-
-def read_task_from_file(file_path: str) -> str:
-    """Read task from the specified file."""
-    with open(file_path, 'r', encoding='utf-8') as file:
-        return file.read()
-
-
-def read_task_from_stdin() -> str:
-    """Read task from stdin."""
-    return sys.stdin.read()
-
-
-def create_runtime(
-    config: AppConfig,
-    sid: str | None = None,
-    headless_mode: bool = True,
-) -> Runtime:
-    """Create a runtime for the agent to run on.
-
-    config: The app config.
-    sid: (optional) The session id. IMPORTANT: please don't set this unless you know what you're doing.
-        Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
-    headless_mode: Whether the agent is run in headless mode. `create_runtime` is typically called within evaluation scripts,
-        where we don't want to have the VSCode UI open, so it defaults to True.
-    """
-    # if sid is provided on the command line, use it as the name of the event stream
-    # otherwise generate it on the basis of the configured jwt_secret
-    # we can do this better, this is just so that the sid is retrieved when we want to restore the session
-    session_id = sid or generate_sid(config)
-
-    # set up the event stream
-    file_store = get_file_store(config.file_store, config.file_store_path)
-    event_stream = EventStream(session_id, file_store)
-
-    # agent class
-    agent_cls = openhands.agenthub.Agent.get_cls(config.default_agent)
-
-    # runtime and tools
-    runtime_cls = get_runtime_cls(config.runtime)
-    logger.debug(f'Initializing runtime: {runtime_cls.__name__}')
-    runtime: Runtime = runtime_cls(
-        config=config,
-        event_stream=event_stream,
-        sid=session_id,
-        plugins=agent_cls.sandbox_plugins,
-        headless_mode=headless_mode,
-    )
-
-    return runtime
-
-
-async def run_controller(
-    config: AppConfig,
-    initial_user_action: Action,
-    sid: str | None = None,
-    runtime: Runtime | None = None,
-    agent: Agent | None = None,
-    exit_on_message: bool = False,
-    fake_user_response_fn: FakeUserResponseFunc | None = None,
-    headless_mode: bool = True,
-) -> State | None:
-    """Main coroutine to run the agent controller with task input flexibility.
-    It's only used when you launch openhands backend directly via cmdline.
-
-    Args:
-        config: The app config.
-        initial_user_action: An Action object containing initial user input
-        sid: (optional) The session id. IMPORTANT: please don't set this unless you know what you're doing.
-            Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
-        runtime: (optional) A runtime for the agent to run on.
-        agent: (optional) A agent to run.
-        exit_on_message: quit if agent asks for a message from user (optional)
-        fake_user_response_fn: An optional function that receives the current state
-            (could be None) and returns a fake user response.
-        headless_mode: Whether the agent is run in headless mode.
-    """
-    # Create the agent
-    if agent is None:
-        agent_cls: Type[Agent] = Agent.get_cls(config.default_agent)
-        agent_config = config.get_agent_config(config.default_agent)
-        llm_config = config.get_llm_config_from_agent(config.default_agent)
-        agent = agent_cls(
-            llm=LLM(config=llm_config),
-            config=agent_config,
-        )
-
-    # make sure the session id is set
-    sid = sid or generate_sid(config)
-
-    if runtime is None:
-        runtime = create_runtime(config, sid=sid, headless_mode=headless_mode)
-        await runtime.connect()
-
-    event_stream = runtime.event_stream
-
-    # restore cli session if available
-    initial_state = None
-    try:
-        logger.debug(
-            f'Trying to restore agent state from cli session {event_stream.sid} if available'
-        )
-        initial_state = State.restore_from_session(
-            event_stream.sid, event_stream.file_store
-        )
-    except Exception as e:
-        logger.debug(f'Cannot restore agent state: {e}')
-
-    # init controller with this initial state
-    controller = AgentController(
-        agent=agent,
-        max_iterations=config.max_iterations,
-        max_budget_per_task=config.max_budget_per_task,
-        agent_to_llm_config=config.get_agent_to_llm_config_map(),
-        event_stream=event_stream,
-        initial_state=initial_state,
-        headless_mode=headless_mode,
-    )
-
-    assert isinstance(
-        initial_user_action, Action
-    ), f'initial user actions must be an Action, got {type(initial_user_action)}'
-    # Logging
-    logger.debug(
-        f'Agent Controller Initialized: Running agent {agent.name}, model '
-        f'{agent.llm.config.model}, with actions: {initial_user_action}'
-    )
-
-    # start event is a MessageAction with the task, either resumed or new
-    if initial_state is not None:
-        # we're resuming the previous session
-        event_stream.add_event(
-            MessageAction(
-                content=(
-                    "Let's get back on track. If you experienced errors before, do "
-                    'NOT resume your task. Ask me about it.'
-                ),
-            ),
-            EventSource.USER,
-        )
-    else:
-        # init with the provided actions
-        event_stream.add_event(initial_user_action, EventSource.USER)
-
-    async def on_event(event: Event):
-        if isinstance(event, AgentStateChangedObservation):
-            if event.agent_state == AgentState.AWAITING_USER_INPUT:
-                if exit_on_message:
-                    message = '/exit'
-                elif fake_user_response_fn is None:
-                    # read until EOF (Ctrl+D on Unix, Ctrl+Z on Windows)
-                    print('Request user input (press Ctrl+D/Z when done) >> ')
-                    message = sys.stdin.read().rstrip()
-                else:
-                    message = fake_user_response_fn(controller.get_state())
-                action = MessageAction(content=message)
-                event_stream.add_event(action, EventSource.USER)
-
-    event_stream.subscribe(EventStreamSubscriber.MAIN, on_event, sid)
-
-    end_states = [
-        AgentState.FINISHED,
-        AgentState.REJECTED,
-        AgentState.ERROR,
-        AgentState.PAUSED,
-        AgentState.STOPPED,
-    ]
-
-    try:
-        await run_agent_until_done(controller, runtime, end_states)
-    except Exception as e:
-        logger.error(f'Exception in main loop: {e}')
-
-    # save session when we're about to close
-    if config.file_store is not None and config.file_store != 'memory':
-        end_state = controller.get_state()
-        # NOTE: the saved state does not include delegates events
-        end_state.save_to_session(event_stream.sid, event_stream.file_store)
-
-    state = controller.get_state()
-
-    # save trajectories if applicable
-    if config.trajectories_path is not None:
-        # if trajectories_path is a folder, use session id as file name
-        if os.path.isdir(config.trajectories_path):
-            file_path = os.path.join(config.trajectories_path, sid + '.json')
-        else:
-            file_path = config.trajectories_path
-        os.makedirs(os.path.dirname(file_path), exist_ok=True)
-        histories = [event_to_trajectory(event) for event in state.history]
-        with open(file_path, 'w') as f:
-            json.dump(histories, f)
-
-    return state
-
-
-def generate_sid(config: AppConfig, session_name: str | None = None) -> str:
-    """Generate a session id based on the session name and the jwt secret."""
-    session_name = session_name or str(uuid.uuid4())
-    jwt_secret = config.jwt_secret
-
-    hash_str = hashlib.sha256(f'{session_name}{jwt_secret}'.encode('utf-8')).hexdigest()
-    return f'{session_name}-{hash_str[:16]}'
-
-
-def auto_continue_response(
-    state: State,
-    encapsulate_solution: bool = False,
-    try_parse: Callable[[Action | None], str] | None = None,
-) -> str:
-    """Default function to generate user responses.
-    Returns 'continue' to tell the agent to proceed without asking for more input.
-    """
-    return 'continue'
-
-
-if __name__ == '__main__':
-    args = parse_arguments()
-
-    # Determine the task
-    if args.file:
-        task_str = read_task_from_file(args.file)
-    elif args.task:
-        task_str = args.task
-    elif not sys.stdin.isatty():
-        task_str = read_task_from_stdin()
-    else:
-        raise ValueError('No task provided. Please specify a task through -t, -f.')
-    initial_user_action: MessageAction = MessageAction(content=task_str)
-    # Load the app config
-    # this will load config from config.toml in the current directory
-    # as well as from the environment variables
-    config = load_app_config(config_file=args.config_file)
-
-    # Override default LLM configs ([llm] section in config.toml)
-    if args.llm_config:
-        llm_config = get_llm_config_arg(args.llm_config)
-        if llm_config is None:
-            raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
-        config.set_llm_config(llm_config)
-
-    # Set default agent
-    config.default_agent = args.agent_cls
-
-    # Set session name
-    session_name = args.name
-    sid = generate_sid(config, session_name)
-
-    # if max budget per task is not sent on the command line, use the config value
-    if args.max_budget_per_task is not None:
-        config.max_budget_per_task = args.max_budget_per_task
-    if args.max_iterations is not None:
-        config.max_iterations = args.max_iterations
-
-    asyncio.run(
-        run_controller(
-            config=config,
-            initial_user_action=initial_user_action,
-            sid=sid,
-            fake_user_response_fn=None
-            if args.no_auto_continue
-            else auto_continue_response,
-        )
-    )
@@ -1,141 +0,0 @@
-from enum import Enum
-from typing import Literal
-
-from litellm import ChatCompletionMessageToolCall
-from pydantic import BaseModel, Field, model_serializer
-
-
-class ContentType(Enum):
-    TEXT = 'text'
-    IMAGE_URL = 'image_url'
-
-
-class Content(BaseModel):
-    type: str
-    cache_prompt: bool = False
-
-    @model_serializer
-    def serialize_model(self):
-        raise NotImplementedError('Subclasses should implement this method.')
-
-
-class TextContent(Content):
-    type: str = ContentType.TEXT.value
-    text: str
-
-    @model_serializer
-    def serialize_model(self):
-        data: dict[str, str | dict[str, str]] = {
-            'type': self.type,
-            'text': self.text,
-        }
-        if self.cache_prompt:
-            data['cache_control'] = {'type': 'ephemeral'}
-        return data
-
-
-class ImageContent(Content):
-    type: str = ContentType.IMAGE_URL.value
-    image_urls: list[str]
-
-    @model_serializer
-    def serialize_model(self):
-        images: list[dict[str, str | dict[str, str]]] = []
-        for url in self.image_urls:
-            images.append({'type': self.type, 'image_url': {'url': url}})
-        if self.cache_prompt and images:
-            images[-1]['cache_control'] = {'type': 'ephemeral'}
-        return images
-
-
-class Message(BaseModel):
-    # NOTE: this is not the same as EventSource
-    # These are the roles in the LLM's APIs
-    role: Literal['user', 'system', 'assistant', 'tool']
-    content: list[TextContent | ImageContent] = Field(default_factory=list)
-    cache_enabled: bool = False
-    vision_enabled: bool = False
-    # function calling
-    function_calling_enabled: bool = False
-    # - tool calls (from LLM)
-    tool_calls: list[ChatCompletionMessageToolCall] | None = None
-    # - tool execution result (to LLM)
-    tool_call_id: str | None = None
-    name: str | None = None  # name of the tool
-
-    @property
-    def contains_image(self) -> bool:
-        return any(isinstance(content, ImageContent) for content in self.content)
-
-    @model_serializer
-    def serialize_model(self) -> dict:
-        # We need two kinds of serializations:
-        # - into a single string: for providers that don't support list of content items (e.g. no vision, no tool calls)
-        # - into a list of content items: the new APIs of providers with vision/prompt caching/tool calls
-        # NOTE: remove this when litellm or providers support the new API
-        if self.cache_enabled or self.vision_enabled or self.function_calling_enabled:
-            return self._list_serializer()
-        # some providers, like HF and Groq/llama, don't support a list here, but a single string
-        return self._string_serializer()
-
-    def _string_serializer(self) -> dict:
-        # convert content to a single string
-        content = '\n'.join(
-            item.text for item in self.content if isinstance(item, TextContent)
-        )
-        message_dict: dict = {'content': content, 'role': self.role}
-
-        # add tool call keys if we have a tool call or response
-        return self._add_tool_call_keys(message_dict)
-
-    def _list_serializer(self) -> dict:
-        content: list[dict] = []
-        role_tool_with_prompt_caching = False
-        for item in self.content:
-            d = item.model_dump()
-            # We have to remove cache_prompt for tool content and move it up to the message level
-            # See discussion here for details: https://github.com/BerriAI/litellm/issues/6422#issuecomment-2438765472
-            if self.role == 'tool' and item.cache_prompt:
-                role_tool_with_prompt_caching = True
-                d.pop('cache_control')
-            if isinstance(item, TextContent):
-                content.append(d)
-            elif isinstance(item, ImageContent) and self.vision_enabled:
-                content.extend(d)
-
-        message_dict: dict = {'content': content, 'role': self.role}
-
-        if role_tool_with_prompt_caching:
-            message_dict['cache_control'] = {'type': 'ephemeral'}
-
-        # add tool call keys if we have a tool call or response
-        return self._add_tool_call_keys(message_dict)
-
-    def _add_tool_call_keys(self, message_dict: dict) -> dict:
-        """Add tool call keys if we have a tool call or response.
-
-        NOTE: this is necessary for both native and non-native tool calling."""
-
-        # an assistant message calling a tool
-        if self.tool_calls is not None:
-            message_dict['tool_calls'] = [
-                {
-                    'id': tool_call.id,
-                    'type': 'function',
-                    'function': {
-                        'name': tool_call.function.name,
-                        'arguments': tool_call.function.arguments,
-                    },
-                }
-                for tool_call in self.tool_calls
-            ]
-
-        # an observation message with tool response
-        if self.tool_call_id is not None:
-            assert (
-                self.name is not None
-            ), 'name is required when tool_call_id is not None'
-            message_dict['tool_call_id'] = self.tool_call_id
-            message_dict['name'] = self.name
-
-        return message_dict
@@ -1,9 +0,0 @@
-from openhands.core.schema.action import ActionType
-from openhands.core.schema.agent import AgentState
-from openhands.core.schema.observation import ObservationType
-
-__all__ = [
-    'ActionType',
-    'ObservationType',
-    'AgentState',
-]
@@ -1,90 +0,0 @@
-from pydantic import BaseModel, Field
-
-__all__ = ['ActionType']
-
-
-class ActionTypeSchema(BaseModel):
-    INIT: str = Field(default='initialize')
-    """Initializes the agent. Only sent by client.
-    """
-
-    MESSAGE: str = Field(default='message')
-    """Represents a message.
-    """
-
-    START: str = Field(default='start')
-    """Starts a new development task OR send chat from the user. Only sent by the client.
-    """
-
-    READ: str = Field(default='read')
-    """Reads the content of a file.
-    """
-
-    WRITE: str = Field(default='write')
-    """Writes the content to a file.
-    """
-
-    EDIT: str = Field(default='edit')
-    """Edits a file by providing a draft.
-    """
-
-    RUN: str = Field(default='run')
-    """Runs a command.
-    """
-
-    RUN_IPYTHON: str = Field(default='run_ipython')
-    """Runs a IPython cell.
-    """
-
-    BROWSE: str = Field(default='browse')
-    """Opens a web page.
-    """
-
-    BROWSE_INTERACTIVE: str = Field(default='browse_interactive')
-    """Interact with the browser instance.
-    """
-
-    DELEGATE: str = Field(default='delegate')
-    """Delegates a task to another agent.
-    """
-
-    FINISH: str = Field(default='finish')
-    """If you're absolutely certain that you've completed your task and have tested your work,
-    use the finish action to stop working.
-    """
-
-    REJECT: str = Field(default='reject')
-    """If you're absolutely certain that you cannot complete the task with given requirements,
-    use the reject action to stop working.
-    """
-
-    NULL: str = Field(default='null')
-
-    SUMMARIZE: str = Field(default='summarize')
-
-    ADD_TASK: str = Field(default='add_task')
-
-    MODIFY_TASK: str = Field(default='modify_task')
-
-    PAUSE: str = Field(default='pause')
-    """Pauses the task.
-    """
-
-    RESUME: str = Field(default='resume')
-    """Resumes the task.
-    """
-
-    STOP: str = Field(default='stop')
-    """Stops the task. Must send a start action to restart a new task.
-    """
-
-    CHANGE_AGENT_STATE: str = Field(default='change_agent_state')
-
-    PUSH: str = Field(default='push')
-    """Push a branch to github."""
-
-    SEND_PR: str = Field(default='send_pr')
-    """Send a PR to github."""
-
-
-ActionType = ActionTypeSchema()
@@ -1,51 +0,0 @@
-from enum import Enum
-
-
-class AgentState(str, Enum):
-    LOADING = 'loading'
-    """The agent is loading.
-    """
-
-    INIT = 'init'
-    """The agent is initialized.
-    """
-
-    RUNNING = 'running'
-    """The agent is running.
-    """
-
-    AWAITING_USER_INPUT = 'awaiting_user_input'
-    """The agent is awaiting user input.
-    """
-
-    PAUSED = 'paused'
-    """The agent is paused.
-    """
-
-    STOPPED = 'stopped'
-    """The agent is stopped.
-    """
-
-    FINISHED = 'finished'
-    """The agent is finished with the current task.
-    """
-
-    REJECTED = 'rejected'
-    """The agent rejects the task.
-    """
-
-    ERROR = 'error'
-    """An error occurred during the task.
-    """
-
-    AWAITING_USER_CONFIRMATION = 'awaiting_user_confirmation'
-    """The agent is awaiting user confirmation.
-    """
-
-    USER_CONFIRMED = 'user_confirmed'
-    """The user confirmed the agent's action.
-    """
-
-    USER_REJECTED = 'user_rejected'
-    """The user rejected the agent's action.
-    """
@@ -1,48 +0,0 @@
-from pydantic import BaseModel, Field
-
-__all__ = ['ObservationType']
-
-
-class ObservationTypeSchema(BaseModel):
-    READ: str = Field(default='read')
-    """The content of a file
-    """
-
-    WRITE: str = Field(default='write')
-
-    EDIT: str = Field(default='edit')
-
-    BROWSE: str = Field(default='browse')
-    """The HTML content of a URL
-    """
-
-    RUN: str = Field(default='run')
-    """The output of a command
-    """
-
-    RUN_IPYTHON: str = Field(default='run_ipython')
-    """Runs a IPython cell.
-    """
-
-    CHAT: str = Field(default='chat')
-    """A message from the user
-    """
-
-    DELEGATE: str = Field(default='delegate')
-    """The result of a task delegated to another agent
-    """
-
-    MESSAGE: str = Field(default='message')
-
-    ERROR: str = Field(default='error')
-
-    SUCCESS: str = Field(default='success')
-
-    NULL: str = Field(default='null')
-
-    AGENT_STATE_CHANGED: str = Field(default='agent_state_changed')
-
-    USER_REJECTED: str = Field(default='user_rejected')
-
-
-ObservationType = ObservationTypeSchema()
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`TROUBLESHOOTING_URL = 'https://docs.all-hands.dev/modules/usage/troubleshooting'`