update sys prompt

Merge branch 'main' into rb/dev-intent
fix empty msg
2026-04-29 03:00:45 -04:00 · 2024-11-15 11:50:09 -05:00 · 2024-11-15 11:49:04 -05:00 · 2024-11-02 20:12:39 -04:00 · 2024-11-02 19:27:19 -04:00 · 2024-11-02 19:25:23 -04:00
83 changed files with 4093 additions and 4724 deletions
--- a/.github/workflows/lint-fix.yml
+++ b/.github/workflows/lint-fix.yml
@@ -1,62 +0,0 @@
-name: Lint Fix
-
-on:
-  pull_request:
-    types: [labeled]
-
-jobs:
-  lint-fix:
-    if: github.event.label.name == 'lint-fix'
-    name: Fix linting issues
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-      pull-requests: write
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          ref: ${{ github.head_ref }}
-          repository: ${{ github.event.pull_request.head.repo.full_name }}
-          fetch-depth: 0
-          token: ${{ secrets.GITHUB_TOKEN }}
-
-      # Frontend lint fixes
-      - name: Install Node.js 20
-        uses: actions/setup-node@v4
-        with:
-          node-version: 20
-      - name: Install frontend dependencies
-        run: |
-          cd frontend
-          npm install --frozen-lockfile
-      - name: Fix frontend lint issues
-        run: |
-          cd frontend
-          npm run lint:fix
-
-      # Python lint fixes
-      - name: Set up python
-        uses: actions/setup-python@v5
-        with:
-          python-version: 3.12
-          cache: 'pip'
-      - name: Install pre-commit
-        run: pip install pre-commit==3.7.0
-      - name: Fix python lint issues
-        run: |
-          # Run all pre-commit hooks and continue even if they modify files (exit code 1)
-          pre-commit run --config ./dev_config/python/.pre-commit-config.yaml --files openhands/**/* evaluation/**/* tests/**/* || true
-
-      # Commit and push changes if any
-      - name: Check for changes
-        id: git-check
-        run: |
-          git diff --quiet || echo "changes=true" >> $GITHUB_OUTPUT
-      - name: Commit and push if there are changes
-        if: steps.git-check.outputs.changes == 'true'
-        run: |
-          git config --local user.email "openhands@all-hands.dev"
-          git config --local user.name "OpenHands Bot"
-          git add -A
-          git commit -m "🤖 Auto-fix linting issues"
-          git push
--- a/.github/workflows/openhands-resolver.yml
+++ b/.github/workflows/openhands-resolver.yml
@@ -40,6 +40,7 @@ permissions:
  issues: write

 jobs:
+
  auto-fix:
    if: |
      github.event_name == 'workflow_call' ||
@@ -75,18 +76,7 @@ jobs:
          cat requirements.txt

      - name: Cache pip dependencies
-        if: |
-          !(
-            github.event.label.name == 'fix-me-experimental' ||
-            (
-              (github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment') &&
-              startsWith(github.event.comment.body, '@openhands-agent-exp')
-            ) ||
-            (
-              github.event_name == 'pull_request_review' &&
-              startsWith(github.event.review.body, '@openhands-agent-exp')
-            )
-          )
+        if: github.event.label.name != 'fix-me-experimental'
        uses: actions/cache@v3
        with:
          path: ${{ env.pythonLocation }}/lib/python3.12/site-packages/*
@@ -150,11 +140,7 @@ jobs:

      - name: Install OpenHands
        run: |
-          if [[ "${{ github.event.label.name }}" == "fix-me-experimental" ]] ||
-             ([[ "${{ github.event_name }}" == "issue_comment" || "${{ github.event_name }}" == "pull_request_review_comment" ]] &&
-              [[ "${{ github.event.comment.body }}" == "@openhands-agent-exp"* ]]) ||
-             ([[ "${{ github.event_name }}" == "pull_request_review" ]] &&
-              [[ "${{ github.event.review.body }}" == "@openhands-agent-exp"* ]]); then
+          if [ "${{ github.event.label.name }}" == "fix-me-experimental" ]; then
            python -m pip install --upgrade pip
            pip install git+https://github.com/all-hands-ai/openhands.git
          else
@@ -195,7 +181,6 @@ jobs:
          retention-days: 30 # Keep the artifact for 30 days

      - name: Create draft PR or push branch
-        if: always() # Create PR or branch even if the previous steps fail
        env:
          GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
          GITHUB_USERNAME: ${{ secrets.PAT_USERNAME }}
@@ -219,7 +204,6 @@ jobs:

      - name: Comment on issue
        uses: actions/github-script@v7
-        if: always() # Comment on issue even if the previous steps fail
        with:
          github-token: ${{secrets.GITHUB_TOKEN}}
          script: |
--- a/COMMUNITY.md
+++ b/COMMUNITY.md
@@ -1,43 +0,0 @@
-# 🙌 The OpenHands Community
-
-The OpenHands community is built around the belief that (1) AI and AI agents are going to fundamentally change the way
-we build software, and (2) if this is true, we should do everything we can to make sure that the benefits provided by
-such powerful technology are accessible to everyone.
-
-If this resonates with you, we'd love to have you join us in our quest!
-
-## 🤝 How to Join
-
-Check out our [How to Join the Community section.](https://github.com/All-Hands-AI/OpenHands?tab=readme-ov-file#-how-to-join-the-community)
-
-## 💪 Becoming a Contributor
-
-We welcome contributions from everyone! Whether you're a developer, a researcher, or simply enthusiastic about advancing
-the field of software engineering with AI, there are many ways to get involved:
-
- **Code Contributions:** Help us develop new core functionality, improve our agents, improve the frontend and other
-interfaces, or anything else that would help make OpenHands better.
- **Research and Evaluation:** Contribute to our understanding of LLMs in software engineering, participate in
-evaluating the models, or suggest improvements.
- **Feedback and Testing:** Use the OpenHands toolset, report bugs, suggest features, or provide feedback on usability.
-
-For details, please check [CONTRIBUTING.md](./CONTRIBUTING.md).
-
-## Code of Conduct
-
-We have a [Code of Conduct](./CODE_OF_CONDUCT.md) that we expect all contributors to adhere to.
-Long story short, we are aiming for an open, welcoming, diverse, inclusive, and healthy community.
-All contributors are expected to contribute to building this sort of community.
-
-## 🛠️ Becoming a Maintainer
-
-For contributors who have made significant and sustained contributions to the project, there is a possibility of joining
-the maintainer team. The process for this is as follows:
-
-1. Any contributor who has made sustained and high-quality contributions to the codebase can be nominated by any
-maintainer. If you feel that you may qualify you can reach out to any of the maintainers that have reviewed your PRs and ask if you can be nominated.
-2. Once a maintainer nominates a new maintainer, there will be a discussion period among the maintainers for at least 3 days.
-3. If no concerns are raised the nomination will be accepted by acclamation, and if concerns are raised there will be a discussion and possible vote.
-
-Note that just making many PRs does not immediately imply that you will become a maintainer. We will be looking
-at sustained high-quality contributions over a period of time, as well as good teamwork and adherence to our [Code of Conduct](./CODE_OF_CONDUCT.md).
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -54,7 +54,7 @@ The agent needs a place to run code and commands. When you run OpenHands on your
 to do this by default. But there are other ways of creating a sandbox for the agent.

 If you work for a company that provides a cloud-based runtime, you could help us add support for that runtime
-by implementing the [interface specified here](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/base.py).
+by implementing the [interface specified here](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/runtime.py).

 #### Testing
 When you write code, it is also good to write tests. Please navigate to the `tests` folder to see existing test suites.
@@ -92,32 +92,3 @@ You may also check out previous PRs in the [PR list](https://github.com/All-Hand

 If your changes are user-facing (e.g. a new feature in the UI, a change in behavior, or a bugfix)
 please include a short message that we can add to our changelog.
-
-## How to Make Effective Contributions
-
-### Opening Issues
-
-If you notice any bugs or have any feature requests please open them via the [issues page](https://github.com/All-Hands-AI/OpenHands/issues). We will triage based on how critical the bug is or how potentially useful the improvement is, discuss, and implement the ones that the community has interest/effort for.
-
-Further, if you see an issue you like, please leave a "thumbs-up" or a comment, which will help us prioritize.
-
-### Making Pull Requests
-
-We're generally happy to consider all PRs, with the evaluation process varying based on the type of change:
-
-#### For Small Improvements
-
-Small improvements with few downsides are typically reviewed and approved quickly.
-One thing to check when making changes is to ensure that all continuous integration tests pass, which you can check before getting a review.
-
-#### For Core Agent Changes
-
-We need to be more careful with changes to the core agent, as it is imperative to maintain high quality. These PRs are evaluated based on three key metrics:
-
-1. **Accuracy**
-2. **Efficiency**
-3. **Code Complexity**
-
-If it improves accuracy, efficiency, or both with only a minimal change to code quality, that's great we're happy to merge it in!
-If there are bigger tradeoffs (e.g. helping efficiency a lot and hurting accuracy a little) we might want to put it behind a feature flag.
-Either way, please feel free to discuss on github issues or slack, and we will give guidance and preliminary feedback.
--- a/Development.md
+++ b/Development.md
@@ -38,9 +38,7 @@ make build
 ```

 ### 3. Configuring the Language Model
-OpenHands supports a diverse array of Language Models (LMs) through the powerful [litellm](https://docs.litellm.ai) library.
-By default, we've chosen Claude Sonnet 3.5 as our go-to model, but the world is your oyster! You can unleash the
-potential of any other LM that piques your interest.
+OpenHands supports a diverse array of Language Models (LMs) through the powerful [litellm](https://docs.litellm.ai) library. By default, we've chosen the mighty GPT-4 from OpenAI as our go-to model, but the world is your oyster! You can unleash the potential of Anthropic's suave Claude, the enigmatic Llama, or any other LM that piques your interest.

 To configure the LM of your choice, run:

@@ -54,7 +52,10 @@ To configure the LM of your choice, run:
   Environment variables > config.toml variables > default variables

 **Note on Alternative Models:**
-See [our documentation](https://docs.all-hands.dev/modules/usage/llms) for recommended models.
+Some alternative models may prove more challenging to tame than others. Fear not, brave adventurer! We shall soon unveil LLM-specific documentation to guide you on your quest.
+And if you've already mastered the art of wielding a model other than OpenAI's GPT, we encourage you to share your setup instructions with us by creating instructions and adding it [to our documentation](https://github.com/All-Hands-AI/OpenHands/tree/main/docs/modules/usage/llms).
+
+For a full list of the LM providers and models available, please consult the [litellm documentation](https://docs.litellm.ai/docs/providers).

 ### 4. Running the application
 #### Option A: Run the Full Application
@@ -97,10 +98,9 @@ poetry run pytest ./tests/unit/test_*.py
 2. Update the poetry.lock file via `poetry lock --no-update`

 ### 9. Use existing Docker image
-To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
-setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
-
-Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.14-nikolaik`
+To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image. Follow these steps:
+1. Set the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
+2. Example: export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.14-nikolaik

 ## Develop inside Docker container

--- a/ISSUE_TRIAGE.md
+++ b/ISSUE_TRIAGE.md
@@ -6,9 +6,9 @@ These are the procedures and guidelines on how issues are triaged in this repo b
 * Issues may be tagged with what it relates to (**backend**, **frontend**, **agent quality**, etc.)

 ## Severity
-* **Low**: Minor issues or affecting single user.
-* **Medium**: Affecting multiple users.
-* **Critical**: Affecting all users or potential security issues.
+* **Low**: Minor issues, single user report
+* **Medium**: Affecting multiple users
+* **Critical**: Affecting all users or potential security issues

 ## Effort
 * Issues may be estimated with effort required (**small effort**, **medium effort**, **large effort**)
@@ -17,9 +17,9 @@ These are the procedures and guidelines on how issues are triaged in this repo b
 * Issues with low implementation difficulty may be tagged with **good first issue**

 ## Not Enough Information
-* User is asked to provide more information (logs, how to reproduce, etc.) when the issue is not clear.
-* If an issue is unclear and the author does not provide more information or respond to a request, the issue may be closed as **not planned** (Usually after a week).
+* User is asked to provide more information (logs, how to reproduce, etc.) when the issue is not clear
+* If an issue is unclear and the author does not provide more information or respond to a request, the issue may be closed as **not planned** (Usually after a week)

 ## Multiple Requests/Fixes in One Issue
-* These issues will be narrowed down to one request/fix so the issue is more easily tracked and fixed.
-* Issues may be broken down into multiple issues if required.
+* These issues will be narrowed down to one request/fix so the issue is more easily tracked and fixed
+* Issues may be broken down into multiple issues if required
--- a/README.md
+++ b/README.md
@@ -42,9 +42,9 @@ docker pull docker.all-hands.dev/all-hands-ai/runtime:0.14-nikolaik

 docker run -it --pull=always \
    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.14-nikolaik \
-    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -p 3000:3000 \
+    -e LOG_ALL_EVENTS=true \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
    docker.all-hands.dev/all-hands-ai/openhands:0.14
@@ -61,7 +61,7 @@ works best, but you have [many options](https://docs.all-hands.dev/modules/usage
 You can also [connect OpenHands to your local filesystem](https://docs.all-hands.dev/modules/usage/runtimes),
 run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode),
 interact with it via a [friendly CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode),
-or run it on tagged issues with [a github action](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/resolver/README.md).
+or run it on tagged issues with [a github action](https://github.com/All-Hands-AI/OpenHands-resolver).

 Visit [Installation](https://docs.all-hands.dev/modules/usage/installation) for more information and setup instructions.

@@ -77,16 +77,25 @@ To learn more about the project, and for tips on using OpenHands,
 There you'll find resources on how to use different LLM providers,
 troubleshooting resources, and advanced configuration options.

-## 🤝 How to Join the Community
+## 🤝 How to Contribute

-OpenHands is a community-driven project, and we welcome contributions from everyone. We do most of our communication
-through Slack, so this is the best place to start, but we also are happy to have you contact us on Discord or Github:
+OpenHands is a community-driven project, and we welcome contributions from everyone.
+Whether you're a developer, a researcher, or simply enthusiastic about advancing the field of
+software engineering with AI, there are many ways to get involved:

- [Join our Slack workspace](https://join.slack.com/t/openhands-ai/shared_invite/zt-2tom0er4l-JeNUGHt_AxpEfIBstbLPiw) - Here we talk about research, architecture, and future development.
- [Join our Discord server](https://discord.gg/ESHStjSjD4) - This is a community-run server for general discussion, questions, and feedback.
- [Read or post Github Issues](https://github.com/All-Hands-AI/OpenHands/issues) - Check out the issues we're working on, or add your own ideas.
+- **Code Contributions:** Help us develop new agents, core functionality, the frontend and other interfaces, or sandboxing solutions.
+- **Research and Evaluation:** Contribute to our understanding of LLMs in software engineering, participate in evaluating the models, or suggest improvements.
+- **Feedback and Testing:** Use the OpenHands toolset, report bugs, suggest features, or provide feedback on usability.

-See more about the community in [COMMUNITY.md](./COMMUNITY.md) or find details on contributing in [CONTRIBUTING.md](./CONTRIBUTING.md).
+For details, please check [CONTRIBUTING.md](./CONTRIBUTING.md).
+
+## 🤖 Join Our Community
+
+Whether you're a developer, a researcher, or simply enthusiastic about OpenHands, we'd love to have you in our community.
+Let's make software engineering better together!
+
+- [Slack workspace](https://join.slack.com/t/openhands-ai/shared_invite/zt-2tom0er4l-JeNUGHt_AxpEfIBstbLPiw) - Here we talk about research, architecture, and future development.
+- [Discord server](https://discord.gg/ESHStjSjD4) - This is a community-run server for general discussion, questions, and feedback.

 ## 📈 Progress

--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/github-action.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/github-action.md
@@ -14,4 +14,4 @@ Pour utiliser l'Action GitHub OpenHands dans le dépôt OpenHands, un mainteneur

 ## Installation de l'Action dans un nouveau dépôt

-Pour installer l'Action GitHub OpenHands dans votre propre dépôt, suivez les [instructions dans le dépôt OpenHands Resolver](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/resolver/README.md).
+Pour installer l'Action GitHub OpenHands dans votre propre dépôt, suivez les [instructions dans le dépôt OpenHands Resolver](https://github.com/All-Hands-AI/OpenHands-resolver?tab=readme-ov-file#using-the-github-actions-workflow).
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/github-action.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/github-action.md
@@ -12,4 +12,4 @@

 ## 在新仓库中安装 Action

-要在你自己的仓库中安装 OpenHands GitHub Action，请按照 [OpenHands Resolver 仓库中的说明](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/resolver/README.md) 进行操作。
+要在你自己的仓库中安装 OpenHands GitHub Action，请按照 [OpenHands Resolver 仓库中的说明](https://github.com/All-Hands-AI/OpenHands-resolver?tab=readme-ov-file#using-the-github-actions-workflow) 进行操作。
--- a/docs/modules/usage/about.md
+++ b/docs/modules/usage/about.md
@@ -1,6 +1,6 @@
-# About OpenHands
+# 📚 Misc

-## Research Strategy
+## ⭐️ Research Strategy

 Achieving full replication of production-grade applications with LLMs is a complex endeavor. Our strategy involves:

@@ -9,11 +9,34 @@ Achieving full replication of production-grade applications with LLMs is a compl
 3. **Task Planning:** Developing capabilities for bug detection, codebase management, and optimization
 4. **Evaluation:** Establishing comprehensive evaluation metrics to better understand and improve our models

-## Default Agent
+## 🚧 Default Agent

 Our default Agent is currently the [CodeActAgent](agents), which is capable of generating code and handling files.

-## Built With
+## 🤝 How to Contribute
+
+OpenHands is a community-driven project, and we welcome contributions from everyone. Whether you're a developer, a researcher, or simply enthusiastic about advancing the field of software engineering with AI, there are many ways to get involved:
+
+- **Code Contributions:** Help us develop the core functionalities, frontend interface, or sandboxing solutions
+- **Research and Evaluation:** Contribute to our understanding of LLMs in software engineering, participate in evaluating the models, or suggest improvements
+- **Feedback and Testing:** Use the OpenHands toolset, report bugs, suggest features, or provide feedback on usability
+
+For details, please check [this document](https://github.com/All-Hands-AI/OpenHands/blob/main/CONTRIBUTING.md).
+
+## 🤖 Join Our Community
+
+We have both Slack workspace for the collaboration on building OpenHands and Discord server for discussion about anything related, e.g., this project, LLM, agent, etc.
+
+- [Slack workspace](https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA)
+- [Discord server](https://discord.gg/ESHStjSjD4)
+
+If you would love to contribute, feel free to join our community. Let's simplify software engineering together!
+
+🐚 **Code less, make more with OpenHands.**
+
+[![Star History Chart](https://api.star-history.com/svg?repos=All-Hands-AI/OpenHands&type=Date)](https://star-history.com/#All-Hands-AI/OpenHands&Date)
+
+## 🛠️ Built With

 OpenHands is built using a combination of powerful frameworks and libraries, providing a robust foundation for its development. Here are the key technologies used in the project:

@@ -21,6 +44,6 @@ OpenHands is built using a combination of powerful frameworks and libraries, pro

 Please note that the selection of these technologies is in progress, and additional technologies may be added or existing ones may be removed as the project evolves. We strive to adopt the most suitable and efficient tools to enhance the capabilities of OpenHands.

-## License
+## 📜 License

-Distributed under MIT [License](https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE).
+Distributed under the MIT License. See [our license](https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE) for more information.
--- a/docs/modules/usage/how-to/custom-sandbox-guide.md
+++ b/docs/modules/usage/how-to/custom-sandbox-guide.md
@@ -62,3 +62,25 @@ Run OpenHands by running ```make run``` in the top level directory.
 ## Technical Explanation

 Please refer to [custom docker image section of the runtime documentation](https://docs.all-hands.dev/modules/usage/architecture/runtime#advanced-how-openhands-builds-and-maintains-od-runtime-images) for more details.
+
+## Troubleshooting / Errors
+
+### Error: ```useradd: UID 1000 is not unique```
+
+If you see this error in the console output it is because OpenHands is trying to create the openhands user in the sandbox with a UID of 1000, however this UID is already being used in the image (for some reason). To fix this change the sandbox_user_id field in the config.toml file to a different value:
+
+```toml
+[core]
+workspace_base="./workspace"
+run_as_openhands=true
+sandbox_base_container_image="custom_image"
+sandbox_user_id="1001"
+```
+
+### Port use errors
+
+If you see an error about a port being in use or unavailable, try deleting all running Docker Containers (run `docker ps` and `docker rm` relevant containers) and then re-running ```make run``` .
+
+## Discuss
+
+For other issues or questions join the [Slack](https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA) or [Discord](https://discord.gg/ESHStjSjD4) and ask!
--- a/docs/modules/usage/how-to/github-action.md
+++ b/docs/modules/usage/how-to/github-action.md
@@ -4,92 +4,12 @@ This guide explains how to use the OpenHands GitHub Action, both within the Open

 ## Using the Action in the OpenHands Repository

-To use the OpenHands GitHub Action in a repository, you can:
+To use the OpenHands GitHub Action in the OpenHands repository, an OpenHands maintainer can:

 1. Create an issue in the repository.
-2. Add the `fix-me` label to the issue or leave a comment on the issue starting with `@openhands-agent`.
-
-The action will automatically trigger and attempt to resolve the issue.
+2. Add the `fix-me` label to the issue.
+3. The action will automatically trigger and attempt to resolve the issue.

 ## Installing the Action in a New Repository

-To install the OpenHands GitHub Action in your own repository, follow
-the [README for the OpenHands Resolver](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/resolver/README.md).
-
-## Usage Tips
-
-### Iterative resolution
-
-1. Create an issue in the repository.
-2. Add the `fix-me` label to the issue, or leave a comment starting with `@openhands-agent`
-3. Review the attempt to resolve the issue by checking the pull request
-4. Follow up with feedback through general comments, review comments, or inline thread comments
-5. Add the `fix-me` label to the pull request, or address a specific comment by starting with `@openhands-agent`
-
-### Label versus Macro
-
- Label (`fix-me`): Requests OpenHands to address the **entire** issue or pull request.
- Macro (`@openhands-agent`): Requests OpenHands to consider only the issue/pull request description and **the specific comment**.
-
-## Advanced Settings
-
-### Add custom repository settings
-
-You can provide custom directions for OpenHands by following the [README for the resolver](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/resolver/README.md#providing-custom-instructions).
-
-### Configure custom macro
-
-To customize the default macro (`@openhands-agent`):
-
-1. [Create a repository variable](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#creating-configuration-variables-for-a-repository) named `OPENHANDS_MACRO`
-2. Assign the variable a custom value
-
-## Writing Effective .openhands_instructions Files
-
-The `.openhands_instructions` file is a file that you can put in the root directory of your repository to guide OpenHands in understanding and working with your repository effectively. Here are key tips for writing high-quality instructions:
-
-### Core Principles
-
-1. **Concise but Informative**: Provide a clear, focused overview of the repository that emphasizes the most common actions OpenHands will need to perform.
-
-2. **Repository Structure**: Explain the key directories and their purposes, especially highlighting where different types of code (e.g., frontend, backend) are located.
-
-3. **Development Workflows**: Document the essential commands for:
-   - Building and setting up the project
-   - Running tests
-   - Linting and code quality checks
-   - Any environment-specific requirements
-
-4. **Testing Guidelines**: Specify:
-   - Where tests are located
-   - How to run specific test suites
-   - Any testing conventions or requirements
-
-### Example Structure
-
-```markdown
-# Repository Overview
-[Brief description of the project]
-
-## General Setup
- Main build command
- Development environment setup
- Pre-commit checks
-
-## Backend
- Location and structure
- Testing instructions
- Environment requirements
-
-## Frontend
- Setup prerequisites
- Build and test commands
- Environment variables
-
-## Additional Guidelines
- Code style requirements
- Special considerations
- Common workflows
-```
-
-For a real-world example, refer to the [OpenHands repository's .openhands_instructions](https://github.com/All-Hands-AI/OpenHands/blob/main/.openhands_instructions).
+To install the OpenHands GitHub Action in your own repository, follow the [directions in the OpenHands Resolver repo](https://github.com/All-Hands-AI/OpenHands-resolver?tab=readme-ov-file#using-the-github-actions-workflow).
--- a/docs/modules/usage/installation.mdx
+++ b/docs/modules/usage/installation.mdx
@@ -15,9 +15,9 @@ docker pull docker.all-hands.dev/all-hands-ai/runtime:0.14-nikolaik

 docker run -it --rm --pull=always \
    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.14-nikolaik \
-    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -p 3000:3000 \
+    -e LOG_ALL_EVENTS=true \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
    docker.all-hands.dev/all-hands-ai/openhands:0.14
--- a/docs/modules/usage/runtimes.md
+++ b/docs/modules/usage/runtimes.md
@@ -49,7 +49,7 @@ but seems to work well on most systems.

 ## All Hands Runtime
 The All Hands Runtime is currently in beta. You can request access by joining
-the #remote-runtime-limited-beta channel on Slack ([see the README](https://github.com/All-Hands-AI/OpenHands?tab=readme-ov-file#-join-our-community) for an invite).
+the #remote-runtime-limited-beta channel on Slack (see the README for an invite).

 To use the All Hands Runtime, set the following environment variables when
 starting OpenHands:
@@ -66,7 +66,7 @@ docker run # ...
 ## Modal Runtime
 Our partners at [Modal](https://modal.com/) have also provided a runtime for OpenHands.

-To use the Modal Runtime, create an account, and then [create an API key.](https://modal.com/settings)
+To use the Modal Runtime, create an account, and then [create an API key](https://modal.com/settings)

 You'll then need to set the following environment variables when starting OpenHands:
 ```bash
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
--- a/docs/package.json
+++ b/docs/package.json
@@ -15,10 +15,10 @@
    "typecheck": "tsc"
  },
  "dependencies": {
-    "@docusaurus/core": "^3.6.2",
-    "@docusaurus/plugin-content-pages": "^3.6.2",
-    "@docusaurus/preset-classic": "^3.6.2",
-    "@docusaurus/theme-mermaid": "^3.6.2",
+    "@docusaurus/core": "^3.6.0",
+    "@docusaurus/plugin-content-pages": "^3.6.0",
+    "@docusaurus/preset-classic": "^3.6.0",
+    "@docusaurus/theme-mermaid": "^3.6.0",
    "@mdx-js/react": "^3.1.0",
    "clsx": "^2.0.0",
    "prism-react-renderer": "^2.4.0",
@@ -29,7 +29,7 @@
  },
  "devDependencies": {
    "@docusaurus/module-type-aliases": "^3.5.1",
-    "@docusaurus/tsconfig": "^3.6.2",
+    "@docusaurus/tsconfig": "^3.6.0",
    "@docusaurus/types": "^3.5.1",
    "typescript": "~5.6.3"
  },
--- a/docs/yarn.lock
+++ b/docs/yarn.lock
--- a/evaluation/discoverybench/run_infer.py
+++ b/evaluation/discoverybench/run_infer.py
@@ -250,6 +250,9 @@ def process_instance(

    config = get_config(metadata)

+    # use a session id for concurrent evaluation
+    sid = 'ID_' + str(instance.instance_id)
+
    # Setup the logger properly, so you can run
    # multi-processing to parallelize the evaluation
    if reset_logger:
@@ -281,7 +284,7 @@ def process_instance(
    instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]

    # Here's how you can run the agent (similar to the `main` function) and get the final task state
-    runtime = create_runtime(config)
+    runtime = create_runtime(config, sid=sid)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance.data_files)

--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -145,7 +145,7 @@ def get_config(
            platform='linux/amd64',
            api_key=os.environ.get('ALLHANDS_API_KEY', None),
            remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
-            keep_runtime_alive=False,
+            keep_remote_runtime_alive=False,
            remote_runtime_init_timeout=3600,
        ),
        # do not mount workspace
--- a/evaluation/utils/shared.py
+++ b/evaluation/utils/shared.py
@@ -3,11 +3,9 @@ import logging
 import multiprocessing as mp
 import os
 import pathlib
-import signal
 import subprocess
 import time
 import traceback
-from contextlib import contextmanager
 from typing import Any, Awaitable, Callable, TextIO

 import pandas as pd
@@ -94,27 +92,6 @@ class EvalException(Exception):
    pass


-class EvalTimeoutException(Exception):
-    pass
-
-
-@contextmanager
-def timeout(seconds: int):
-    def timeout_handler(signum, frame):
-        raise EvalTimeoutException(f'Function timed out after {seconds} seconds')
-
-    # Set up the signal handler
-    original_handler = signal.signal(signal.SIGALRM, timeout_handler)
-    signal.alarm(seconds)
-
-    try:
-        yield
-    finally:
-        # Restore the original handler and disable the alarm
-        signal.alarm(0)
-        signal.signal(signal.SIGALRM, original_handler)
-
-
 def codeact_user_response(
    state: State,
    encapsulate_solution: bool = False,
@@ -303,33 +280,15 @@ def _process_instance_wrapper(
    metadata: EvalMetadata,
    use_mp: bool,
    max_retries: int = 5,
-    timeout_seconds: int | None = None,
 ) -> EvalOutput:
-    """Wrap the process_instance_func to handle retries and errors."""
+    """Wrap the process_instance_func to handle retries and errors.
+
+    Retry an instance up to max_retries times if it fails (e.g., due to transient network/runtime issues).
+    """
    for attempt in range(max_retries + 1):
        try:
-            if timeout_seconds is not None:
-                with timeout(timeout_seconds):
-                    result = process_instance_func(instance, metadata, use_mp)
-            else:
-                result = process_instance_func(instance, metadata, use_mp)
+            result = process_instance_func(instance, metadata, use_mp)
            return result
-        except EvalTimeoutException as e:
-            error = f'Timeout after {timeout_seconds} seconds'
-            stacktrace = traceback.format_exc()
-            msg = (
-                '-' * 10
-                + '\n'
-                + f'Timeout ({timeout_seconds} seconds) in instance [{instance.instance_id}], Stopped evaluation for this instance.'
-                + '\n'
-                + '-' * 10
-            )
-            logger.exception(e)
-            return EvalOutput(
-                instance_id=instance.instance_id,
-                test_result={},
-                error=error,
-            )
        except Exception as e:
            error = str(e)
            stacktrace = traceback.format_exc()
@@ -378,7 +337,6 @@ def run_evaluation(
        [pd.Series, EvalMetadata, bool], Awaitable[EvalOutput]
    ],
    max_retries: int = 5,  # number of retries for each instance
-    timeout_seconds: int | None = None,
 ):
    use_multiprocessing = num_workers > 1

@@ -399,14 +357,7 @@ def run_evaluation(
        if use_multiprocessing:
            with mp.Pool(num_workers) as pool:
                args_iter = (
-                    (
-                        process_instance_func,
-                        instance,
-                        metadata,
-                        True,
-                        max_retries,
-                        timeout_seconds,
-                    )
+                    (process_instance_func, instance, metadata, True, max_retries)
                    for _, instance in dataset.iterrows()
                )
                results = pool.imap_unordered(_process_instance_wrapper_mp, args_iter)
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "openhands-frontend",
-  "version": "0.14.1",
+  "version": "0.14.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "openhands-frontend",
-      "version": "0.14.1",
+      "version": "0.14.0",
      "dependencies": {
        "@monaco-editor/react": "^4.6.0",
        "@nextui-org/react": "^2.4.8",
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -1,6 +1,6 @@
 {
  "name": "openhands-frontend",
-  "version": "0.14.1",
+  "version": "0.14.0",
  "private": true,
  "type": "module",
  "engines": {
--- a/frontend/src/api/open-hands.ts
+++ b/frontend/src/api/open-hands.ts
@@ -185,7 +185,8 @@ class OpenHands {
  }

  static async getRuntimeId(): Promise<{ runtime_id: string }> {
-    const data = await request("/api/conversation");
+    const response = await request("/api/config");
+    const data = await response.json();

    return data;
  }
--- a/frontend/vite.config.ts
+++ b/frontend/vite.config.ts
@@ -91,7 +91,6 @@ export default defineConfig(({ mode }) => {
    test: {
      environment: "jsdom",
      setupFiles: ["vitest.setup.ts"],
-      reporters: "basic",
      exclude: [...configDefaults.exclude, "tests"],
      coverage: {
        reporter: ["text", "json", "html", "lcov", "text-summary"],
--- a/frontend/vitest.setup.ts
+++ b/frontend/vitest.setup.ts
@@ -13,7 +13,7 @@ vi.mock("react-i18next", async (importOriginal) => ({
 }));

 // Mock requests during tests
-beforeAll(() => server.listen({ onUnhandledRequest: "bypass" }));
+beforeAll(() => server.listen());
 afterEach(() => {
  server.resetHandlers();
  // Cleanup the document body after each test
--- a/openhands/agenthub/README.md
+++ b/openhands/agenthub/README.md
@@ -7,10 +7,10 @@ Contributors from different backgrounds and interests can choose to contribute t

 ## Constructing an Agent

-The abstraction for an agent can be found [here](../controller/agent.py).
+The abstraction for an agent can be found [here](../openhands/controller/agent.py).

 Agents are run inside of a loop. At each iteration, `agent.step()` is called with a
-[State](../controller/state/state.py) input, and the agent must output an [Action](../events/action).
+[State](../openhands/controller/state/state.py) input, and the agent must output an [Action](../openhands/events/action).

 Every agent also has a `self.llm` which it can use to interact with the LLM configured by the user.
 See the [LiteLLM docs for `self.llm.completion`](https://docs.litellm.ai/docs/completion).
@@ -46,17 +46,17 @@ The agent can add and modify subtasks through the `AddTaskAction` and `ModifyTas

 Here is a list of available Actions, which can be returned by `agent.step()`:

- [`CmdRunAction`](../events/action/commands.py) - Runs a command inside a sandboxed terminal
- [`IPythonRunCellAction`](../events/action/commands.py) - Execute a block of Python code interactively (in Jupyter notebook) and receives `CmdOutputObservation`. Requires setting up `jupyter` [plugin](../runtime/plugins) as a requirement.
- [`FileReadAction`](../events/action/files.py) - Reads the content of a file
- [`FileWriteAction`](../events/action/files.py) - Writes new content to a file
- [`BrowseURLAction`](../events/action/browse.py) - Gets the content of a URL
- [`AddTaskAction`](../events/action/tasks.py) - Adds a subtask to the plan
- [`ModifyTaskAction`](../events/action/tasks.py) - Changes the state of a subtask.
- [`AgentFinishAction`](../events/action/agent.py) - Stops the control loop, allowing the user/delegator agent to enter a new task
- [`AgentRejectAction`](../events/action/agent.py) - Stops the control loop, allowing the user/delegator agent to enter a new task
- [`AgentFinishAction`](../events/action/agent.py) - Stops the control loop, allowing the user to enter a new task
- [`MessageAction`](../events/action/message.py) - Represents a message from an agent or the user
+- [`CmdRunAction`](../openhands/events/action/commands.py) - Runs a command inside a sandboxed terminal
+- [`IPythonRunCellAction`](../openhands/events/action/commands.py) - Execute a block of Python code interactively (in Jupyter notebook) and receives `CmdOutputObservation`. Requires setting up `jupyter` [plugin](../openhands/runtime/plugins) as a requirement.
+- [`FileReadAction`](../openhands/events/action/files.py) - Reads the content of a file
+- [`FileWriteAction`](../openhands/events/action/files.py) - Writes new content to a file
+- [`BrowseURLAction`](../openhands/events/action/browse.py) - Gets the content of a URL
+- [`AddTaskAction`](../openhands/events/action/tasks.py) - Adds a subtask to the plan
+- [`ModifyTaskAction`](../openhands/events/action/tasks.py) - Changes the state of a subtask.
+- [`AgentFinishAction`](../openhands/events/action/agent.py) - Stops the control loop, allowing the user/delegator agent to enter a new task
+- [`AgentRejectAction`](../openhands/events/action/agent.py) - Stops the control loop, allowing the user/delegator agent to enter a new task
+- [`AgentFinishAction`](../openhands/events/action/agent.py) - Stops the control loop, allowing the user to enter a new task
+- [`MessageAction`](../openhands/events/action/message.py) - Represents a message from an agent or the user

 To serialize and deserialize an action, you can use:
 - `action.to_dict()` to serialize the action to a dictionary to be sent to the UI, including a user-friendly string representation of the message
@@ -70,12 +70,12 @@ But they may also appear as a result of asynchronous events (e.g. a message from

 Here is a list of available Observations:

- [`CmdOutputObservation`](../events/observation/commands.py)
- [`BrowserOutputObservation`](../events/observation/browse.py)
- [`FileReadObservation`](../events/observation/files.py)
- [`FileWriteObservation`](../events/observation/files.py)
- [`ErrorObservation`](../events/observation/error.py)
- [`SuccessObservation`](../events/observation/success.py)
+- [`CmdOutputObservation`](../openhands/events/observation/commands.py)
+- [`BrowserOutputObservation`](../openhands/events/observation/browse.py)
+- [`FileReadObservation`](../openhands/events/observation/files.py)
+- [`FileWriteObservation`](../openhands/events/observation/files.py)
+- [`ErrorObservation`](../openhands/events/observation/error.py)
+- [`SuccessObservation`](../openhands/events/observation/success.py)

 You can use `observation.to_dict()` and `observation_from_dict` to serialize and deserialize observations.

--- a/openhands/agenthub/codeact_agent/README.md
+++ b/openhands/agenthub/codeact_agent/README.md
@@ -10,57 +10,3 @@ The conceptual idea is illustrated below. At each turn, the agent can:
   - Execute any valid `Python` code with [an interactive Python interpreter](https://ipython.org/). This is simulated through `bash` command, see plugin system below for more details.

 ![image](https://github.com/All-Hands-AI/OpenHands/assets/38853559/92b622e3-72ad-4a61-8f41-8c040b6d5fb3)
-
-## Adding New Tools
-
-The CodeAct agent uses a function calling interface to define tools that the agent can use. Tools are defined in `function_calling.py` using the `ChatCompletionToolParam` class from `litellm`. Each tool consists of:
-
-1. A description string that explains what the tool does and how to use it
-2. A tool definition using `ChatCompletionToolParam` that specifies:
-   - The tool's name
-   - The tool's parameters and their types
-   - Required vs optional parameters
-
-Here's an example of how a tool is defined:
-
-```python
-MyTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='my_tool',
-        description='Description of what the tool does and how to use it',
-        parameters={
-            'type': 'object',
-            'properties': {
-                'param1': {
-                    'type': 'string',
-                    'description': 'Description of parameter 1',
-                },
-                'param2': {
-                    'type': 'integer',
-                    'description': 'Description of parameter 2',
-                },
-            },
-            'required': ['param1'],  # List required parameters here
-        },
-    ),
-)
-```
-
-To add a new tool:
-
-1. Define your tool in `function_calling.py` following the pattern above
-2. Add your tool to the `get_tools()` function in `function_calling.py`
-3. Implement the corresponding action handler in the agent to process the tool's invocation
-
-The agent currently supports several built-in tools:
- `execute_bash`: Execute bash commands
- `execute_ipython_cell`: Run Python code in IPython
- `browser`: Interact with a web browser
- `str_replace_editor`: Edit files using string replacement
- `edit_file`: Edit files using LLM-based editing
-
-Tools can be enabled/disabled through configuration parameters:
- `codeact_enable_browsing`: Enable browser interaction
- `codeact_enable_jupyter`: Enable IPython code execution
- `codeact_enable_llm_editor`: Enable LLM-based file editing (if disabled, uses string replacement editor instead)
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -20,6 +20,7 @@ from openhands.events.action import (
    IPythonRunCellAction,
    MessageAction,
 )
+from openhands.events.event import EventSource
 from openhands.events.observation import (
    AgentDelegateObservation,
    BrowserOutputObservation,
@@ -187,7 +188,9 @@ class CodeActAgent(Agent):
                )
            ]
        elif isinstance(action, CmdRunAction) and action.source == 'user':
-            content = [TextContent(text=f'User executed the command:\n{action.command}')]
+            content = [
+                TextContent(text=f'User executed the command:\n{action.command}')
+            ]
            return [
                Message(
                    role='user',
@@ -255,6 +258,8 @@ class CodeActAgent(Agent):
            message = Message(role='user', content=[TextContent(text=text)])
        elif isinstance(obs, FileEditObservation):
            text = truncate_content(str(obs), max_message_chars)
+            if obs.source == EventSource.USER:
+                text = '[User has edited a file]\n' + text
            message = Message(role='user', content=[TextContent(text=text)])
        elif isinstance(obs, BrowserOutputObservation):
            text = obs.get_agent_obs_text()
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@@ -12,7 +12,6 @@ from litellm import (
    ModelResponse,
 )

-from openhands.core.exceptions import FunctionCallNotExistsError
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import (
    Action,
@@ -485,9 +484,7 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
            elif tool_call.function.name == 'browser':
                action = BrowseInteractiveAction(browser_actions=arguments['code'])
            else:
-                raise FunctionCallNotExistsError(
-                    f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
-                )
+                raise RuntimeError(f'Unknown tool call: {tool_call.function.name}')

            # We only add thought to the first action
            if i == 0:
--- a/openhands/agenthub/codeact_agent/micro/github.md
+++ b/openhands/agenthub/codeact_agent/micro/github.md
@@ -21,9 +21,11 @@ Here are some instructions for pushing, but ONLY do this if the user asks you to
 * After opening or updating a pull request, send the user a short message with a link to the pull request.
 * Do all of the above in as few steps as possible. E.g. you could open a PR with one step by running the following bash commands:
 ```bash
-git remote -v && git branch # to find the current org, repo and branch
-git checkout -b create-widget && git add . && git commit -m "Create widget" && git push -u origin create-widget
-curl -X POST "https://api.github.com/repos/$ORG_NAME/$REPO_NAME/pulls" \
+git checkout -b create-widget
+git add .
+git commit -m "Create widget"
+git push origin create-widget
+curl -X POST "https://api.github.com/repos/CodeActOrg/openhands/pulls" \
    -H "Authorization: Bearer $GITHUB_TOKEN" \
    -d '{"title":"Create widget","head":"create-widget","base":"openhands-workspace"}'
 ```
--- a/openhands/agenthub/codeact_agent/prompts/system_prompt.j2
+++ b/openhands/agenthub/codeact_agent/prompts/system_prompt.j2
@@ -1,4 +1,9 @@
 You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
+You also observe user actions, like "User has edited a file", and
+infer the user's long-term intentons based on these edits.
+If you think you can help the user finish the task at hand,
+you should offer a suggestion as to how you can
+help, and wait for the user to confirm.
 <IMPORTANT>
 * If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
 * When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
--- a/openhands/agenthub/codeact_agent/system_prompt.j2
+++ b/openhands/agenthub/codeact_agent/system_prompt.j2
@@ -0,0 +1,176 @@
+{% set MINIMAL_SYSTEM_PREFIX %}
+A chat between a curious user and an artificial intelligence assistant.
+The assistant gives helpful, detailed answers to the user's questions.
+It also observes user actions, like "User has edited a file", and
+infers the user's long-term intentons based on these edits. If the agent thinks
+it can help the user finish the task at hand, it offers a suggestion as to how it can
+help, and waits for the user to confirm.
+
+[1] The assistant can use a Python environment with <execute_ipython>, e.g.:
+<execute_ipython>
+print("Hello World!")
+</execute_ipython>
+
+[2] The assistant can execute bash commands wrapped with <execute_bash>, e.g. <execute_bash> ls </execute_bash>.
+If a bash command returns exit code `-1`, this means the process is not yet finished.
+The assistant must then send a second <execute_bash>. The second <execute_bash> can be empty
+(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process,
+or it can contain the text `ctrl+c` to interrupt the process.
+
+For commands that may run indefinitely, the output should be redirected to a file and the command run
+in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
+If a command execution result says "Command timed out. Sending SIGINT to the process",
+the assistant should retry running the command in the background.
+
+[3] The assistant can edit files using <file_edit> by setting the file path and providing a draft of the new file content. The draft file content does not need to be exactly the same as the existing file content; the assistant may skip some lines and only include the parts that need to be changed.
+
+IMPORTANT: When editing large file (e.g., > 300 lines), the assistant MUST SPECIFY the range of lines to be edited by setting `start` and `end` (1-indexed, both inclusive). For example, `<file_edit path="/path/to/file.txt" start=1 end=-1>` means the assistant will edit the whole file (from line 1 to the end of the file). `start=1` and `end=-1` are the default values, so the assistant can omit them if they are the same as the default values.
+BEFORE you start editing, you MUST view the ENTIRE body of the part you want to edit and get the correct begin and end line numbers.
+
+When editing files, the assistant should include comments indicating where the code will not change. For example, use comments like `# no changes before` or `# no changes here` to clearly mark sections of the code that remain unchanged. This helps to provide context and ensure clarity in the edits being made.
+
+Possible cases:
+- File too long: When the file to be edited is too long, the assistant should set `start` and `end` (1-indexed, both inclusive) to specify the range of lines to be edited. For example, `<file_edit path="/path/to/file.txt" start=100 end=200>` means the assistant will only edit lines 100 to 200 of `/path/to/file.txt`.
+- Append to file: If the assistant wants to append to a file, it should set both `start` and `end` to `-1`.
+- File does not exist: If `<file_edit>` is pointing to a file that does not exist, a new file with the exact content will be created.
+
+Important: because line numbers are useful, the assistant should always use the provided functions to search (e.g., `search_dir`) or view the file content (e.g., `open_file`) along with the line numbers. DO NOT use other methods (e.g., `cat`) to view the file content.
+
+**Example 1 (general edit for short files)**
+For example, given an existing file `/path/to/file.py` that looks like this:
+
+(this is the end of the file)
+1|class MyClass:
+2|    def __init__(self):
+3|        self.x = 1
+4|        self.y = 2
+5|        self.z = 3
+6|
+7|print(MyClass().z)
+8|print(MyClass().x)
+(this is the end of the file)
+
+
+The assistant wants to edit the file to look like this:
+
+(this is the end of the file)
+1|class MyClass:
+2|    def __init__(self):
+3|        self.x = 1
+4|        self.y = 2
+5|
+6|print(MyClass().y)
+(this is the end of the file)
+
+
+The assistant may produce an edit action like this:
+<file_edit path="/path/to/file.txt" start=1 end=-1>
+class MyClass:
+    def __init__(self):
+        # no changes before
+        self.y = 2
+        # self.z is removed
+
+# MyClass().z is removed
+print(MyClass().y)
+</file_edit>
+
+**Example 2 (append to file for short files)**
+
+For example, given an existing file `/path/to/file.py` that looks like this:
+
+(this is the end of the file)
+1|class MyClass:
+2|    def __init__(self):
+3|        self.x = 1
+4|        self.y = 2
+5|        self.z = 3
+6|
+7|print(MyClass().z)
+8|print(MyClass().x)
+(this is the end of the file)
+
+To append the following lines to the file:
+```python
+print(MyClass().y)
+```
+
+The assistant may produce an edit action like this:
+<file_edit path="/path/to/file.txt" start=-1 end=-1>
+print(MyClass().y)
+</file_edit>
+
+**Example 3 (edit for long files)**
+
+Given an existing file `/path/to/file.py` that looks like this:
+
+(1000 more lines above)
+1001|class MyClass:
+1002|    def __init__(self):
+1003|        self.x = 1
+1004|        self.y = 2
+1005|        self.z = 3
+1006|
+1007|print(MyClass().z)
+1008|print(MyClass().x)
+(2000 more lines below)
+
+
+The assistant wants to edit the file to look like this:
+
+(1000 more lines above)
+1001|class MyClass:
+1002|    def __init__(self):
+1003|        self.x = 1
+1004|        self.y = 2
+1005|
+1006|print(MyClass().y)
+(2000 more lines below)
+
+The assistant may produce an edit action like this:
+
+<file_edit path="/path/to/file.txt" start=1001 end=1008>
+class MyClass:
+    def __init__(self):
+        # no changes before
+        self.y = 2
+        # self.z is removed
+
+# MyClass().z is removed
+print(MyClass().y)
+</file_edit>
+
+
+{% endset %}
+{% set BROWSING_PREFIX %}
+The assistant can browse the Internet with <execute_browse> and </execute_browse>.
+For example, <execute_browse> Tell me the usa's president using google search </execute_browse>.
+Or <execute_browse> Tell me what is in http://example.com </execute_browse>.
+{% endset %}
+{% set PIP_INSTALL_PREFIX %}
+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
+{% endset %}
+{% set SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + BROWSING_PREFIX + PIP_INSTALL_PREFIX %}
+{% set COMMAND_DOCS %}
+Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
+{{ agent_skills_docs }}
+IMPORTANT:
+- `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits!
+- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+- Any code issued should be less than 50 lines to avoid context being cut off!
+{% endset %}
+{% set SYSTEM_SUFFIX %}
+Responses should be concise.
+The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block.
+Include ONLY ONE <execute_ipython>, <execute_bash>, or <execute_browse> per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed.
+If the assistant is finished with the task you MUST include <finish></finish> in your response.
+IMPORTANT: Execute code using <execute_ipython>, <execute_bash>, or <execute_browse> whenever possible.
+The assistant should utilize full file paths and the `pwd` command to prevent path-related errors.
+The assistant MUST NOT apologize to the user or thank the user after running commands or editing files. It should only address the user in response to an explicit message from the user, or to ask for more information.
+The assistant MUST NOT push any changes to GitHub unless explicitly requested to do so.
+
+{% endset %}
+{# Combine all parts without newlines between them #}
+{{ SYSTEM_PREFIX -}}
+{{- COMMAND_DOCS -}}
+{{- SYSTEM_SUFFIX }}
--- a/openhands/agenthub/codeact_agent/user_prompt.j2
+++ b/openhands/agenthub/codeact_agent/user_prompt.j2
@@ -0,0 +1,278 @@
+{% set DEFAULT_EXAMPLE %}
+--- START OF EXAMPLE ---
+
+USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.
+
+ASSISTANT:
+Sure! Let me first check the current directory:
+<execute_bash>
+ls
+</execute_bash>
+
+USER:
+OBSERVATION:
+openhands@runtime:~/workspace$
+
+ASSISTANT:
+There is no `app.py` file in the current directory. Let me create a Python file `app.py`:
+<file_edit path="/workspace/app.py" start=-1 end=-1>
+from flask import Flask
+app = Flask(__name__)
+
+@app.route('/')
+def index():
+    numbers = list(range(1, 11))
+    return str(numbers)
+
+if __name__ == '__main__':
+    app.run(port=5000)
+</file_edit>
+
+USER:
+OBSERVATION:
+[New file /workspace/app.py is created.]
+(begin of changes)
+--- /workspace/app.py
+++ /workspace/app.py
+@@ -1 +1,10 @@
+from flask import Flask
+app = Flask(__name__)
+
+@app.route('/')
+def index():
+    numbers = list(range(1, 11))
+    return str(numbers)
+
+if __name__ == '__main__':
+    app.run(port=5000)
+(end of changes)
+
+ASSISTANT:
+I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
+<execute_bash>
+python3 app.py > server.log 2>&1 &
+</execute_bash>
+
+USER:
+OBSERVATION:
+[1] 121[1]+  Exit 1                  python3 app.py > server.log 2>&1
+
+ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
+<execute_bash>
+cat server.log
+</execute_bash>
+
+USER:
+OBSERVATION:
+Traceback (most recent call last):
+  File "/workspace/app.py", line 2, in <module>
+    from flask import Flask
+ModuleNotFoundError: No module named 'flask'
+
+ASSISTANT:
+It seems that Flask is not installed. Let me install Flask for you:
+<execute_bash>
+pip install flask
+</execute_bash>
+
+USER:
+OBSERVATION:
+Defaulting to user installation because normal site-packages is not writeable
+Collecting flask
+  Using cached flask-3.0.3-py3-none-any.whl (101 kB)
+Collecting blinker>=1.6.2
+  Using cached blinker-1.7.0-py3-none-any.whl (13 kB)
+Collecting Werkzeug>=3.0.0
+  Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB)
+Collecting click>=8.1.3
+  Using cached click-8.1.7-py3-none-any.whl (97 kB)
+Collecting itsdangerous>=2.1.2
+  Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
+Requirement already satisfied: Jinja2>=3.1.2 in /home/openhands/.local/lib/python3.10/site-packages (from flask) (3.1.3)
+Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5)
+Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
+Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
+
+ASSISTANT:
+Now that Flask is installed, let me run the Python file again:
+<execute_bash>
+python3 app.py > server.log 2>&1 &
+</execute_bash>
+
+USER:
+OBSERVATION:
+[1] 124
+
+ASSISTANT:
+Let me check the server log again:
+<execute_bash>
+cat server.log
+</execute_bash>
+
+USER:
+OBSERVATION:
+* Serving Flask app 'app'
+ * Debug mode: off
+WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
+ * Running on http://127.0.0.1:5000
+Press CTRL+C to quit
+
+ASSISTANT:
+The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
+
+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+<execute_browse>
+Get the content on "http://127.0.0.1:5000"
+</execute_browse>
+
+USER:
+OBSERVATION:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
+USER: Now kill the server, make it display the numbers in a table format.
+
+ASSISTANT:
+Sure! Let me stop the server first:
+<execute_bash>
+kill 124
+</execute_bash>
+
+USER:
+OBSERVATION:
+[1]+  Terminated              python3 app.py > server.log 2>&1
+
+ASSISTANT:
+The server has been stopped. Let me open the Python file and modify it.
+<execute_ipython>
+open_file('app.py')
+</execute_ipython>
+
+USER:
+[File: /workspace/app.py (10 lines total)]
+(this is the beginning of the file)
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6|    numbers = list(range(1, 11))
+7|    return str(numbers)
+8|
+9|if __name__ == '__main__':
+10|    app.run(port=5000)
+(this is the end of the file)
+
+ASSISTANT:
+I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file:
+<file_edit path="/workspace/app.py" start=4 end=7>
+@app.route('/')
+def index():
+    numbers = list(range(1, 11))
+    ret = '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
+    return ret
+</file_edit>
+
+USER:
+Observation:
+[Edited existing file /workspace/app.py]
+[File: /workspace/app.py (10 lines total after edit)]
+(this is the beginning of the file)
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6|    numbers = list(range(1, 11))
+7|    ret = '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
+8|    return ret
+9|
+10|if __name__ == '__main__':
+11|    app.run(port=5000)
+(this is the end of the file)
+[File updated (edited at line 7). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+USER:
+[User has edited a file]
+OBSERVATION:
+[Existing file app.py is edited with 1 changes.]
+[begin of edit 1 / 1]
+(content before edit)
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6|    numbers = list(range(1, 11))
+7|    ret = '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
+8|    return ret
+9|
+10|if __name__ == '__main__':
+11|    app.run(port=5000)
+(content after edit)
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6|    numbers = list(range(1, 11))
+7|    ret = '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
+8|    return ret
+9|
+10|@app.route('/reversed')
+11|
+12|if __name__ == '__main__':
+13|    app.run(port=5000)
+
+[end of edit 1 / 1]
+
+ASSISTANT:
+I see you've started to add a new route '/reversed'. Would you like me to
+implement the logic for this route by reversing the numbers?
+
+USER:
+continue
+
+ASSISTANT:
+I should edit the file to display the reversed numbers in a table format. I should include correct indentation. Let me update the file:
+<file_edit path="/workspace/app.py" start=10 end=12>
+@app.route('/reversed')
+def index():
+    numbers = reversed(list(range(1, 11)))
+    ret = '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
+    return ret
+</file_edit>
+
+
+ASSISTANT:
+Running the updated file:
+<execute_bash>
+python3 app.py > server.log 2>&1 &
+</execute_bash>
+
+USER:
+Observation:
+[1] 126
+
+ASSISTANT:
+The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests!
+<finish></finish>
+
+--- END OF EXAMPLE ---
+{% endset %}
+Here is an example of how you can interact with the environment for task solving:
+{{ DEFAULT_EXAMPLE }}
+{% if micro_agent %}
+--- BEGIN OF GUIDELINE ---
+The following information may assist you in completing your task:
+
+{{ micro_agent }}
+--- END OF GUIDELINE ---
+{% endif %}
+
+NOW, LET'S START!
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -12,13 +12,11 @@ from openhands.controller.state.state import State, TrafficControlState
 from openhands.controller.stuck import StuckDetector
 from openhands.core.config import AgentConfig, LLMConfig
 from openhands.core.exceptions import (
-    FunctionCallNotExistsError,
    FunctionCallValidationError,
    LLMMalformedActionError,
    LLMNoActionError,
    LLMResponseError,
 )
-from openhands.core.logger import LOG_ALL_EVENTS
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.schema import AgentState
 from openhands.events import EventSource, EventStream, EventStreamSubscriber
@@ -284,6 +282,11 @@ class AgentController:
            if self.state.agent_state == AgentState.USER_REJECTED:
                await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
            return
+
+        if observation.source == EventSource.USER:
+            if self.state.agent_state == AgentState.AWAITING_USER_INPUT:
+                await self.set_agent_state_to(AgentState.RUNNING)
+
        elif isinstance(observation, ErrorObservation):
            if self.state.agent_state == AgentState.ERROR:
                self.state.metrics.merge(self.state.local_metrics)
@@ -490,7 +493,6 @@ class AgentController:
            LLMNoActionError,
            LLMResponseError,
            FunctionCallValidationError,
-            FunctionCallNotExistsError,
        ) as e:
            self.event_stream.add_event(
                ErrorObservation(
@@ -529,7 +531,8 @@ class AgentController:

        await self.update_state_after_step()

-        log_level = 'info' if LOG_ALL_EVENTS else 'debug'
+        # Use info level if LOG_ALL_EVENTS is set
+        log_level = 'info' if os.getenv('LOG_ALL_EVENTS') in ('true', '1') else 'debug'
        self.log(log_level, str(action), extra={'msg_type': 'ACTION'})

    async def _delegate_step(self):
--- a/openhands/core/cli.py
+++ b/openhands/core/cli.py
@@ -1,5 +1,6 @@
 import asyncio
 import logging
+import os
 import sys
 from typing import Type
 from uuid import uuid4
@@ -38,6 +39,8 @@ from openhands.storage import get_file_store


 def display_message(message: str):
+    if not message:
+        return
    print(colored('🤖 ' + message + '\n', 'yellow'))


@@ -56,7 +59,8 @@ def display_command_output(output: str):


 def display_file_edit(event: FileEditAction | FileEditObservation):
-    print(colored(str(event), 'green'))
+    # print(colored(str(event), 'green'))
+    pass


 def display_event(event: Event):
@@ -66,14 +70,24 @@ def display_event(event: Event):
    if isinstance(event, MessageAction):
        if event.source == EventSource.AGENT:
            display_message(event.content)
-    if isinstance(event, CmdRunAction):
+    elif isinstance(event, CmdRunAction):
        display_command(event.command)
-    if isinstance(event, CmdOutputObservation):
+    elif isinstance(event, CmdOutputObservation):
        display_command_output(event.content)
-    if isinstance(event, FileEditAction):
-        display_file_edit(event)
-    if isinstance(event, FileEditObservation):
+    elif isinstance(event, FileEditAction):
        display_file_edit(event)
+    elif isinstance(event, FileEditObservation):
+        if event.source == EventSource.ENVIRONMENT:
+            # For file watcher events, use a different color and format
+            if not event.prev_exist:
+                print(colored(f'📝 File created: {event.path}', 'cyan'))
+            elif event.new_content == '':
+                print(colored(f'🗑️  File deleted: {event.path}', 'red'))
+            else:
+                print(colored(f'✏️  File modified: {event.path}', 'yellow'))
+        else:
+            # For regular file edits, use the standard display
+            display_file_edit(event)


 async def main():
@@ -89,6 +103,15 @@ async def main():
        help='Show the version number and exit',
        default=None,
    )
+    # Add the watch directory argument
+    parser.add_argument(
+        '-w',
+        '--watch',
+        type=str,
+        help='Directory to watch for changes',
+        metavar='DIR',
+        default=None,
+    )
    args = parser.parse_args()

    if args.version:
@@ -110,6 +133,19 @@ async def main():
    file_store = get_file_store(config.file_store, config.file_store_path)
    event_stream = EventStream(sid, file_store)

+    if args.watch:
+        from openhands.intent.watch import FileWatcher
+
+        watch_dir = os.path.abspath(args.watch)
+        if not os.path.isdir(watch_dir):
+            print(
+                f"Error: Watch directory '{args.watch}' does not exist or is not a directory"
+            )
+            return
+        print(f'Starting file watcher for directory: {watch_dir}')
+        file_watcher = FileWatcher(directory=watch_dir, event_stream=event_stream)
+        file_watcher.start()
+
    runtime_cls = get_runtime_cls(config.runtime)
    runtime: Runtime = runtime_cls(  # noqa: F841
        config=config,
@@ -124,11 +160,12 @@ async def main():
        max_iterations=config.max_iterations,
        max_budget_per_task=config.max_budget_per_task,
        agent_to_llm_config=config.get_agent_to_llm_config_map(),
+        agent_configs=config.get_agent_configs(),
        event_stream=event_stream,
    )

    async def prompt_for_next_task():
-        # Run input() in a thread pool to avoid blocking the event loop
+        await controller.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
        loop = asyncio.get_event_loop()
        next_message = await loop.run_in_executor(
            None, lambda: input('How can I help? >> ')
@@ -162,6 +199,11 @@ async def main():
        controller, runtime, [AgentState.STOPPED, AgentState.ERROR]
    )

+    # Stop file watcher if it was started
+    if args.watch and 'file_watcher' in locals():
+        print('Stopping file watcher...')
+        file_watcher.stop()
+

 if __name__ == '__main__':
    loop = asyncio.new_event_loop()
--- a/openhands/core/config/utils.py
+++ b/openhands/core/config/utils.py
@@ -241,7 +241,6 @@ def get_llm_config_arg(

    Args:
        llm_config_arg: The group of llm settings to get from the config.toml file.
-        toml_file: Path to the configuration file to read from. Defaults to 'config.toml'.

    Returns:
        LLMConfig: The LLMConfig object with the settings from the config file.
@@ -385,7 +384,7 @@ def load_app_config(
    """Load the configuration from the specified config file and environment variables.

    Args:
-        set_logging_levels: Whether to set the global variables for logging levels.
+        set_logger_levels: Whether to set the global variables for logging levels.
        config_file: Path to the config file. Defaults to 'config.toml' in the current directory.
    """
    config = AppConfig()
--- a/openhands/core/exceptions.py
+++ b/openhands/core/exceptions.py
@@ -114,10 +114,3 @@ class FunctionCallValidationError(Exception):

    def __init__(self, message):
        super().__init__(message)
-
-
-class FunctionCallNotExistsError(Exception):
-    """Exception raised when an LLM call a tool that is not registered."""
-
-    def __init__(self, message):
-        super().__init__(message)
--- a/openhands/core/logger.py
+++ b/openhands/core/logger.py
@@ -17,8 +17,6 @@ if DEBUG:
 LOG_TO_FILE = os.getenv('LOG_TO_FILE', 'False').lower() in ['true', '1', 'yes']
 DISABLE_COLOR_PRINTING = False

-LOG_ALL_EVENTS = os.getenv('LOG_ALL_EVENTS', 'False').lower() in ['true', '1', 'yes']
-
 ColorType = Literal[
    'red',
    'green',
@@ -91,11 +89,8 @@ class ColoredFormatter(logging.Formatter):
                return f'{time_str} - {name_str}:{level_str}: {record.filename}:{record.lineno}\n{msg_type_color}\n{msg}'
            return f'{time_str} - {msg_type_color}\n{msg}'
        elif msg_type == 'STEP':
-            if LOG_ALL_EVENTS:
-                msg = '\n\n==============\n' + record.msg + '\n'
-                return f'{msg}'
-            else:
-                return record.msg
+            msg = '\n\n==============\n' + record.msg + '\n'
+            return f'{msg}'
        return super().format(record)


--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -59,8 +59,7 @@ def create_runtime(
    """Create a runtime for the agent to run on.

    config: The app config.
-    sid: (optional) The session id. IMPORTANT: please don't set this unless you know what you're doing.
-        Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
+    sid: The session id.
    headless_mode: Whether the agent is run in headless mode. `create_runtime` is typically called within evaluation scripts,
        where we don't want to have the VSCode UI open, so it defaults to True.
    """
@@ -106,8 +105,6 @@ async def run_controller(
    Args:
        config: The app config.
        initial_user_action: An Action object containing initial user input
-        sid: (optional) The session id. IMPORTANT: please don't set this unless you know what you're doing.
-            Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
        runtime: (optional) A runtime for the agent to run on.
        agent: (optional) A agent to run.
        exit_on_message: quit if agent asks for a message from user (optional)
--- a/openhands/core/message.py
+++ b/openhands/core/message.py
@@ -56,7 +56,6 @@ class Message(BaseModel):
    cache_enabled: bool = False
    vision_enabled: bool = False
    # function calling
-    function_calling_enabled: bool = False
    # - tool calls (from LLM)
    tool_calls: list[ChatCompletionMessageToolCall] | None = None
    # - tool execution result (to LLM)
@@ -73,22 +72,22 @@ class Message(BaseModel):
        # - into a single string: for providers that don't support list of content items (e.g. no vision, no tool calls)
        # - into a list of content items: the new APIs of providers with vision/prompt caching/tool calls
        # NOTE: remove this when litellm or providers support the new API
-        if self.cache_enabled or self.vision_enabled or self.function_calling_enabled:
+        if (
+            self.cache_enabled
+            or self.vision_enabled
+            or self.tool_call_id is not None
+            or self.tool_calls is not None
+        ):
            return self._list_serializer()
-        # some providers, like HF and Groq/llama, don't support a list here, but a single string
        return self._string_serializer()

-    def _string_serializer(self) -> dict:
-        # convert content to a single string
+    def _string_serializer(self):
        content = '\n'.join(
            item.text for item in self.content if isinstance(item, TextContent)
        )
-        message_dict: dict = {'content': content, 'role': self.role}
+        return {'content': content, 'role': self.role}

-        # add tool call keys if we have a tool call or response
-        return self._add_tool_call_keys(message_dict)
-
-    def _list_serializer(self) -> dict:
+    def _list_serializer(self):
        content: list[dict] = []
        role_tool_with_prompt_caching = False
        for item in self.content:
@@ -103,37 +102,24 @@ class Message(BaseModel):
            elif isinstance(item, ImageContent) and self.vision_enabled:
                content.extend(d)

-        message_dict: dict = {'content': content, 'role': self.role}
-
+        ret: dict = {'content': content, 'role': self.role}
        # pop content if it's empty
        if not content or (
            len(content) == 1
            and content[0]['type'] == 'text'
            and content[0]['text'] == ''
        ):
-            message_dict.pop('content')
+            ret.pop('content')

        if role_tool_with_prompt_caching:
-            message_dict['cache_control'] = {'type': 'ephemeral'}
+            ret['cache_control'] = {'type': 'ephemeral'}

-        # add tool call keys if we have a tool call or response
-        return self._add_tool_call_keys(message_dict)
-
-    def _add_tool_call_keys(self, message_dict: dict) -> dict:
-        """Add tool call keys if we have a tool call or response.
-
-        NOTE: this is necessary for both native and non-native tool calling"""
-
-        # an assistant message calling a tool
-        if self.tool_calls is not None:
-            message_dict['tool_calls'] = self.tool_calls
-
-        # an observation message with tool response
        if self.tool_call_id is not None:
            assert (
                self.name is not None
            ), 'name is required when tool_call_id is not None'
-            message_dict['tool_call_id'] = self.tool_call_id
-            message_dict['name'] = self.name
-
-        return message_dict
+            ret['tool_call_id'] = self.tool_call_id
+            ret['name'] = self.name
+        if self.tool_calls:
+            ret['tool_calls'] = self.tool_calls
+        return ret
--- a/openhands/intent/init.py
+++ b/openhands/intent/init.py
@@ -0,0 +1 @@
+"""Intent detection and processing for OpenHands."""
--- a/openhands/intent/watch.py
+++ b/openhands/intent/watch.py
@@ -0,0 +1,464 @@
+import os
+import time
+from difflib import unified_diff
+from pathlib import Path
+from threading import Timer
+from typing import Dict, Optional, Set
+
+import pathspec
+from watchdog.events import FileSystemEvent, FileSystemEventHandler
+from watchdog.observers import Observer
+
+from openhands.events import EventSource, EventStream
+from openhands.events.observation import FileEditObservation
+
+
+class FileWatcher(FileSystemEventHandler):
+    """Watches a directory for filesystem changes and emits events to the EventStream.
+
+    Args:
+        directory (str): The directory path to watch for changes
+        event_stream (EventStream): The event stream to emit events to
+        recursive (bool, optional): Whether to watch subdirectories recursively. Defaults to True.
+        patterns (list[str], optional): List of glob patterns to match files against. Defaults to None.
+        ignore_patterns (list[str], optional): List of glob patterns to ignore. Defaults to None.
+    """
+
+    def __init__(
+        self,
+        directory: str,
+        event_stream: EventStream,
+        recursive: bool = True,
+        patterns: Optional[list[str]] = None,
+        ignore_patterns: Optional[list[str]] = None,
+    ):
+        super().__init__()
+        self.directory = os.path.abspath(directory)
+        self.event_stream = event_stream
+        self.recursive = recursive
+        self.patterns = patterns
+        # Always ignore .git directory and its contents
+        self.ignore_patterns = {'.git', '.git/*'}
+        # Add any explicitly provided ignore patterns
+        if ignore_patterns:
+            self.ignore_patterns.update(ignore_patterns)
+
+        # Load .gitignore patterns
+        self.gitignore_spec = self._load_gitignore()
+
+        self.observer = Observer()
+        # Keep track of file contents
+        self.file_contents: Dict[str, str] = {}
+        # Track files with pending changes
+        self.pending_changes: Set[str] = set()
+        # Debounce timer for each file
+        self.debounce_timers: Dict[str, Timer] = {}
+        # Debounce delay in seconds
+        self.debounce_delay = 0.1
+        # Whether to use debouncing (disabled for testing)
+        self.use_debouncing = True
+        # Track recently deleted files for handling atomic renames
+        self.recent_deletes: Dict[str, tuple[str, float]] = {}
+        # Time window to consider a delete+create as a rename (in seconds)
+        self.rename_window = 0.1
+        # Initialize file contents for existing files
+        self._initialize_file_contents()
+
+    def _load_gitignore(self) -> pathspec.PathSpec:
+        """Load .gitignore patterns from the watched directory."""
+        gitignore_patterns = []
+
+        # Only look for .gitignore in the watched directory
+        gitignore_path = os.path.join(self.directory, '.gitignore')
+        try:
+            if os.path.isfile(gitignore_path):
+                with open(gitignore_path, 'r') as f:
+                    patterns = f.read().splitlines()
+                    # Filter out empty lines and comments
+                    patterns = [p for p in patterns if p and not p.startswith('#')]
+                    gitignore_patterns.extend(patterns)
+        except IOError:
+            pass
+
+        return pathspec.PathSpec.from_lines(
+            pathspec.patterns.GitWildMatchPattern, gitignore_patterns
+        )
+
+    def _initialize_file_contents(self):
+        """Initialize the content cache for existing files in the watched directory."""
+        for root, dirs, files in os.walk(self.directory, topdown=True):
+            # Filter out ignored directories to prevent walking into them
+            dirs[:] = [
+                d for d in dirs if not self._should_ignore(os.path.join(root, d))
+            ]
+
+            # Process files in non-ignored directories
+            for file in files:
+                abs_path = os.path.join(root, file)
+                if not self._should_ignore(abs_path) and self._should_watch(abs_path):
+                    try:
+                        with open(abs_path, 'r', encoding='utf-8') as f:
+                            self.file_contents[abs_path] = f.read()
+                    except (IOError, UnicodeDecodeError):
+                        # Skip files that can't be read or aren't text files
+                        pass
+
+    def start(self):
+        """Start watching the directory for changes."""
+        self.observer.schedule(self, self.directory, recursive=self.recursive)
+        self.observer.start()
+
+    def stop(self):
+        """Stop watching the directory."""
+        # Cancel any pending timers
+        for timer in self.debounce_timers.values():
+            timer.cancel()
+        self.observer.stop()
+        self.observer.join()
+
+    def _handle_debounced_change(self, path: str):
+        """Handle a debounced file change event."""
+        if path not in self.pending_changes:
+            return
+
+        self.pending_changes.remove(path)
+        self.debounce_timers.pop(path, None)
+
+        # Skip if file should be ignored
+        if self._should_ignore(path) or not self._should_watch(path):
+            return
+
+        # Skip if this is a neovim swap file or backup file
+        if (
+            path.endswith('.swp')
+            or path.endswith('.swo')
+            or path.endswith('~')
+            or os.path.basename(path).startswith('4913')
+        ):
+            return
+
+        rel_path = os.path.relpath(path, self.directory)
+        old_content = self.file_contents.get(path, '')
+        new_content = self._read_file_content(path)
+
+        # Only emit event if content actually changed
+        if old_content != new_content:
+            diff = self._generate_diff(old_content, new_content, rel_path)
+            self.file_contents[path] = new_content
+
+            observation = FileEditObservation(
+                path=rel_path,
+                prev_exist=True,
+                old_content=old_content,
+                new_content=new_content,
+                content=diff,
+            )
+            self.event_stream.add_event(observation, EventSource.USER)
+
+    def _schedule_debounced_change(self, path: str):
+        """Schedule a debounced change event for a file."""
+        # Cancel existing timer if any
+        if path in self.debounce_timers:
+            self.debounce_timers[path].cancel()
+
+        # Create new timer
+        timer = Timer(self.debounce_delay, self._handle_debounced_change, args=[path])
+        timer.start()
+        self.debounce_timers[path] = timer
+        self.pending_changes.add(path)
+
+    def _should_ignore(self, path: str) -> bool:
+        """Check if the path should be ignored based on ignore patterns and .gitignore."""
+        # Get path relative to watched directory
+        rel_path = os.path.relpath(path, self.directory)
+
+        # Convert Windows paths to Unix style for consistency
+        rel_path = rel_path.replace(os.sep, '/')
+
+        # First check if any part of the path contains .git
+        path_parts = rel_path.split('/')
+        for i in range(len(path_parts)):
+            if path_parts[i] == '.git':
+                return True
+
+        # Then check explicit ignore patterns
+        if any(Path(rel_path).match(pattern) for pattern in self.ignore_patterns):
+            return True
+
+        # For directories, we need to check both the directory path and path with trailing slash
+        is_dir = os.path.isdir(path)
+        if is_dir:
+            # Check directory path both with and without trailing slash
+            return self.gitignore_spec.match_file(
+                rel_path
+            ) or self.gitignore_spec.match_file(rel_path + '/')
+
+        # For files, just check the path directly
+        return self.gitignore_spec.match_file(rel_path)
+
+    def _should_watch(self, path: str) -> bool:
+        """Check if the path should be watched based on patterns."""
+        if self.patterns is None:
+            return True
+        rel_path = os.path.relpath(path, self.directory)
+        return any(Path(rel_path).match(pattern) for pattern in self.patterns)
+
+    def _read_file_content(self, path: str) -> str:
+        """Read the content of a file, returning empty string if it fails."""
+        try:
+            with open(path, 'r', encoding='utf-8') as f:
+                return f.read()
+        except (IOError, UnicodeDecodeError):
+            return ''
+
+    def _generate_diff(self, old_content: str, new_content: str, path: str) -> str:
+        """Generate a unified diff between old and new content without context lines."""
+        old_lines = old_content.splitlines(keepends=True)
+        new_lines = new_content.splitlines(keepends=True)
+
+        # Generate diff with no context lines (n=0)
+        diff_lines = list(
+            unified_diff(
+                old_lines, new_lines, fromfile=path, tofile=path, n=0, lineterm=''
+            )
+        )
+
+        # Remove the file name headers and timestamp lines (first 2 lines)
+        if len(diff_lines) > 2:
+            diff_lines = diff_lines[2:]
+
+            # Also remove the @@ lines that show line numbers
+            diff_lines = [line for line in diff_lines if not line.startswith('@@')]
+
+        return ''.join(diff_lines)
+
+    def on_created(self, event: FileSystemEvent):
+        """Handle file creation event."""
+        if event.is_directory:
+            return
+
+        # If this is a neovim swap file or backup file, ignore it
+        if (
+            event.src_path.endswith('.swp')
+            or event.src_path.endswith('.swo')
+            or event.src_path.endswith('~')
+            or os.path.basename(event.src_path).startswith('4913')
+        ):
+            return
+
+        if self._should_ignore(event.src_path) or not self._should_watch(
+            event.src_path
+        ):
+            return
+
+        # Check if this is part of an atomic rename operation
+        rel_path = os.path.relpath(event.src_path, self.directory)
+        now = time.time()
+        for old_path, (old_content, timestamp) in list(self.recent_deletes.items()):
+            if now - timestamp <= self.rename_window:
+                # This is likely a rename operation
+                new_content = self._read_file_content(event.src_path)
+                if new_content == old_content:
+                    # This is definitely a rename, don't emit any events
+                    self.file_contents[event.src_path] = new_content
+                    self.recent_deletes.pop(old_path)
+                    return
+
+        if self.use_debouncing:
+            self._schedule_debounced_change(event.src_path)
+        else:
+            new_content = self._read_file_content(event.src_path)
+            self.file_contents[event.src_path] = new_content
+
+            # For new files, the diff will be all additions
+            diff = self._generate_diff('', new_content, rel_path)
+
+            observation = FileEditObservation(
+                path=rel_path,
+                prev_exist=False,
+                old_content='',
+                new_content=new_content,
+                content=diff,
+            )
+            self.event_stream.add_event(observation, EventSource.USER)
+
+    def on_modified(self, event: FileSystemEvent):
+        """Handle file modification event."""
+        if event.is_directory:
+            return
+
+        # If this is a neovim swap file or backup file, ignore it
+        if (
+            event.src_path.endswith('.swp')
+            or event.src_path.endswith('.swo')
+            or event.src_path.endswith('~')
+            or os.path.basename(event.src_path).startswith('4913')
+        ):
+            return
+
+        if self._should_ignore(event.src_path) or not self._should_watch(
+            event.src_path
+        ):
+            return
+
+        if self.use_debouncing:
+            self._schedule_debounced_change(event.src_path)
+        else:
+            rel_path = os.path.relpath(event.src_path, self.directory)
+            old_content = self.file_contents.get(event.src_path, '')
+            new_content = self._read_file_content(event.src_path)
+
+            # Only emit event if content actually changed
+            if old_content != new_content:
+                diff = self._generate_diff(old_content, new_content, rel_path)
+                self.file_contents[event.src_path] = new_content
+
+                observation = FileEditObservation(
+                    path=rel_path,
+                    prev_exist=True,
+                    old_content=old_content,
+                    new_content=new_content,
+                    content=diff,
+                )
+                self.event_stream.add_event(observation, EventSource.USER)
+
+    def on_deleted(self, event: FileSystemEvent):
+        """Handle file deletion event."""
+        if event.is_directory:
+            return
+
+        # If this is a neovim swap file or backup file, ignore it
+        if (
+            event.src_path.endswith('.swp')
+            or event.src_path.endswith('.swo')
+            or event.src_path.endswith('~')
+            or os.path.basename(event.src_path).startswith('4913')
+        ):
+            return
+
+        # Cancel any pending changes for this file
+        if event.src_path in self.debounce_timers:
+            self.debounce_timers[event.src_path].cancel()
+            self.debounce_timers.pop(event.src_path)
+            self.pending_changes.discard(event.src_path)
+
+        if self._should_ignore(event.src_path) or not self._should_watch(
+            event.src_path
+        ):
+            return
+
+        # Store the deleted file's content
+        old_content = self.file_contents.get(event.src_path, '')
+        self.file_contents.pop(event.src_path, None)
+
+        if self.use_debouncing:
+            # Only schedule a delete timer if we haven't already scheduled one
+            if event.src_path not in self.recent_deletes:
+                # Store the content temporarily in case this is a rename
+                self.recent_deletes[event.src_path] = (old_content, time.time())
+                # Schedule cleanup of recent_deletes after the rename window
+                timer = Timer(
+                    self.rename_window,
+                    self._handle_delayed_delete,
+                    args=[event.src_path, old_content],
+                )
+                timer.start()
+        else:
+            # Emit deletion event immediately
+            rel_path = os.path.relpath(event.src_path, self.directory)
+            diff = self._generate_diff(old_content, '', rel_path)
+
+            observation = FileEditObservation(
+                path=rel_path,
+                prev_exist=True,
+                old_content=old_content,
+                new_content='',
+                content=diff,
+            )
+            self.event_stream.add_event(observation, EventSource.USER)
+
+    def _handle_delayed_delete(self, path: str, old_content: str):
+        """Handle a deletion after waiting to see if it's part of a rename."""
+        # Use dict.get() to safely check if the path is still in recent_deletes
+        # and its content matches what we expect
+        stored = self.recent_deletes.get(path)
+        if stored is not None and stored[0] == old_content:
+            # This was a real deletion, not part of a rename
+            rel_path = os.path.relpath(path, self.directory)
+            diff = self._generate_diff(old_content, '', rel_path)
+
+            observation = FileEditObservation(
+                path=rel_path,
+                prev_exist=True,
+                old_content=old_content,
+                new_content='',
+                content=diff,
+            )
+            self.event_stream.add_event(observation, EventSource.USER)
+            # Use pop with a default value to avoid KeyError
+            self.recent_deletes.pop(path, None)
+
+    def on_moved(self, event: FileSystemEvent):
+        """Handle file move/rename event."""
+        if event.is_directory:
+            return
+
+        # Cancel any pending changes for the source file
+        if event.src_path in self.debounce_timers:
+            self.debounce_timers[event.src_path].cancel()
+            self.debounce_timers.pop(event.src_path)
+            self.pending_changes.discard(event.src_path)
+
+        # If this is a neovim swap file or backup file, ignore it
+        if (
+            event.src_path.endswith('.swp')
+            or event.src_path.endswith('.swo')
+            or event.src_path.endswith('~')
+            or os.path.basename(event.src_path).startswith('4913')
+            or event.dest_path.endswith('.swp')
+            or event.dest_path.endswith('.swo')
+            or event.dest_path.endswith('~')
+            or os.path.basename(event.dest_path).startswith('4913')
+        ):
+            return
+
+        if self._should_ignore(event.src_path) or not self._should_watch(
+            event.src_path
+        ):
+            return
+
+        # Handle source file deletion
+        src_rel_path = os.path.relpath(event.src_path, self.directory)
+        old_content = self.file_contents.get(event.src_path, '')
+
+        # For the source file, generate a deletion diff
+        src_diff = self._generate_diff(old_content, '', src_rel_path)
+
+        observation = FileEditObservation(
+            path=src_rel_path,
+            prev_exist=True,
+            old_content=old_content,
+            new_content='',
+            content=src_diff,
+        )
+        self.event_stream.add_event(observation, EventSource.USER)
+        self.file_contents.pop(event.src_path, None)
+
+        # Handle destination file creation
+        if not self._should_ignore(event.dest_path) and self._should_watch(
+            event.dest_path
+        ):
+            dest_rel_path = os.path.relpath(event.dest_path, self.directory)
+            self.file_contents[event.dest_path] = old_content
+
+            # For the destination file, generate an addition diff
+            dest_diff = self._generate_diff('', old_content, dest_rel_path)
+
+            observation = FileEditObservation(
+                path=dest_rel_path,
+                prev_exist=False,
+                old_content='',
+                new_content=old_content,
+                content=dest_diff,
+            )
+            self.event_stream.add_event(observation, EventSource.USER)
--- a/openhands/linter/init.py
+++ b/openhands/linter/init.py
@@ -1,11 +1,9 @@
 """Linter module for OpenHands.

-Part of this Linter module is adapted from Aider (Apache 2.0 License, [original
-code](https://github.com/paul-gauthier/aider/blob/main/aider/linter.py)).
- Please see the [original repository](https://github.com/paul-gauthier/aider) for more information.
- The detailed implementation of the linter can be found at: https://github.com/All-Hands-AI/openhands-aci.
+Part of this Linter module is adapted from Aider (Apache 2.0 License, [original code](https://github.com/paul-gauthier/aider/blob/main/aider/linter.py)). Please see the [original repository](https://github.com/paul-gauthier/aider) for more information.
 """

-from openhands_aci.linter import DefaultLinter, LintResult
+from openhands.linter.base import LintResult
+from openhands.linter.linter import DefaultLinter

 __all__ = ['DefaultLinter', 'LintResult']
--- a/openhands/linter/base.py
+++ b/openhands/linter/base.py
@@ -0,0 +1,79 @@
+from abc import ABC, abstractmethod
+
+from pydantic import BaseModel
+
+
+class LintResult(BaseModel):
+    file: str
+    line: int  # 1-indexed
+    column: int  # 1-indexed
+    message: str
+
+    def visualize(self, half_window: int = 3) -> str:
+        """Visualize the lint result by print out all the lines where the lint result is found.
+
+        Args:
+            half_window: The number of context lines to display around the error on each side.
+        """
+        with open(self.file, 'r') as f:
+            file_lines = f.readlines()
+
+        # Add line numbers
+        _span_size = len(str(len(file_lines)))
+        file_lines = [
+            f'{i + 1:>{_span_size}}|{line.rstrip()}'
+            for i, line in enumerate(file_lines)
+        ]
+
+        # Get the window of lines to display
+        assert self.line <= len(file_lines) and self.line > 0
+        line_idx = self.line - 1
+        begin_window = max(0, line_idx - half_window)
+        end_window = min(len(file_lines), line_idx + half_window + 1)
+
+        selected_lines = file_lines[begin_window:end_window]
+        line_idx_in_window = line_idx - begin_window
+
+        # Add character hint
+        _character_hint = (
+            _span_size * ' '
+            + ' ' * (self.column)
+            + '^'
+            + ' ERROR HERE: '
+            + self.message
+        )
+        selected_lines[line_idx_in_window] = (
+            f'\033[91m{selected_lines[line_idx_in_window]}\033[0m'
+            + '\n'
+            + _character_hint
+        )
+        return '\n'.join(selected_lines)
+
+
+class LinterException(Exception):
+    """Base class for all linter exceptions."""
+
+    pass
+
+
+class BaseLinter(ABC):
+    """Base class for all linters.
+
+    Each linter should be able to lint files of a specific type and return a list of (parsed) lint results.
+    """
+
+    encoding: str = 'utf-8'
+
+    @property
+    @abstractmethod
+    def supported_extensions(self) -> list[str]:
+        """The file extensions that this linter supports, such as .py or .tsx."""
+        return []
+
+    @abstractmethod
+    def lint(self, file_path: str) -> list[LintResult]:
+        """Lint the given file.
+
+        file_path: The path to the file to lint. Required to be absolute.
+        """
+        pass
--- a/openhands/linter/languages/python.py
+++ b/openhands/linter/languages/python.py
@@ -0,0 +1,98 @@
+from typing import List
+
+from openhands.core.logger import openhands_logger as logger
+from openhands.linter.base import BaseLinter, LintResult
+from openhands.linter.utils import run_cmd
+
+
+def python_compile_lint(fname: str) -> list[LintResult]:
+    try:
+        with open(fname, 'r') as f:
+            code = f.read()
+        compile(code, fname, 'exec')  # USE TRACEBACK BELOW HERE
+        return []
+    except SyntaxError as err:
+        err_lineno = getattr(err, 'end_lineno', err.lineno)
+        err_offset = getattr(err, 'end_offset', err.offset)
+        if err_offset and err_offset < 0:
+            err_offset = err.offset
+        return [
+            LintResult(
+                file=fname, line=err_lineno, column=err_offset or 1, message=err.msg
+            )
+        ]
+
+
+def flake_lint(filepath: str) -> list[LintResult]:
+    fatal = 'F821,F822,F831,E112,E113,E999,E902'
+    flake8_cmd = f'flake8 --select={fatal} --isolated {filepath}'
+
+    try:
+        cmd_outputs = run_cmd(flake8_cmd)
+    except FileNotFoundError:
+        return []
+    results: list[LintResult] = []
+    if not cmd_outputs:
+        return results
+    for line in cmd_outputs.splitlines():
+        parts = line.split(':')
+        if len(parts) >= 4:
+            _msg = parts[3].strip()
+            if len(parts) > 4:
+                _msg += ': ' + parts[4].strip()
+
+            try:
+                line_num = int(parts[1])
+            except ValueError as e:
+                logger.warning(
+                    f'Error parsing flake8 output for line: {e}. Parsed parts: {parts}. Skipping...'
+                )
+                continue
+
+            try:
+                column_num = int(parts[2])
+            except ValueError as e:
+                column_num = 1
+                _msg = (
+                    parts[2].strip() + ' ' + _msg
+                )  # add the unparsed message to the original message
+                logger.warning(
+                    f'Error parsing flake8 output for column: {e}. Parsed parts: {parts}. Using default column 1.'
+                )
+
+            results.append(
+                LintResult(
+                    file=filepath,
+                    line=line_num,
+                    column=column_num,
+                    message=_msg,
+                )
+            )
+    return results
+
+
+class PythonLinter(BaseLinter):
+    @property
+    def supported_extensions(self) -> List[str]:
+        return ['.py']
+
+    def lint(self, file_path: str) -> list[LintResult]:
+        error = flake_lint(file_path)
+        if not error:
+            error = python_compile_lint(file_path)
+        return error
+
+    def compile_lint(self, file_path: str, code: str) -> List[LintResult]:
+        try:
+            compile(code, file_path, 'exec')
+            return []
+        except SyntaxError as e:
+            return [
+                LintResult(
+                    file=file_path,
+                    line=e.lineno,
+                    column=e.offset,
+                    message=str(e),
+                    rule='SyntaxError',
+                )
+            ]
--- a/openhands/linter/languages/treesitter.py
+++ b/openhands/linter/languages/treesitter.py
@@ -0,0 +1,74 @@
+import warnings
+
+from grep_ast import TreeContext, filename_to_lang
+from grep_ast.parsers import PARSERS
+from tree_sitter_languages import get_parser
+
+from openhands.linter.base import BaseLinter, LintResult
+
+# tree_sitter is throwing a FutureWarning
+warnings.simplefilter('ignore', category=FutureWarning)
+
+
+def tree_context(fname, code, line_nums):
+    context = TreeContext(
+        fname,
+        code,
+        color=False,
+        line_number=True,
+        child_context=False,
+        last_line=False,
+        margin=0,
+        mark_lois=True,
+        loi_pad=3,
+        # header_max=30,
+        show_top_of_file_parent_scope=False,
+    )
+    line_nums = set(line_nums)
+    context.add_lines_of_interest(line_nums)
+    context.add_context()
+    output = context.format()
+    return output
+
+
+def traverse_tree(node):
+    """Traverses the tree to find errors."""
+    errors = []
+    if node.type == 'ERROR' or node.is_missing:
+        line_no = node.start_point[0] + 1
+        col_no = node.start_point[1] + 1
+        error_type = 'Missing node' if node.is_missing else 'Syntax error'
+        errors.append((line_no, col_no, error_type))
+
+    for child in node.children:
+        errors += traverse_tree(child)
+
+    return errors
+
+
+class TreesitterBasicLinter(BaseLinter):
+    @property
+    def supported_extensions(self) -> list[str]:
+        return list(PARSERS.keys())
+
+    def lint(self, file_path: str) -> list[LintResult]:
+        """Use tree-sitter to look for syntax errors, display them with tree context."""
+        lang = filename_to_lang(file_path)
+        if not lang:
+            return []
+        parser = get_parser(lang)
+        with open(file_path, 'r') as f:
+            code = f.read()
+        tree = parser.parse(bytes(code, 'utf-8'))
+        errors = traverse_tree(tree.root_node)
+        if not errors:
+            return []
+        return [
+            LintResult(
+                file=file_path,
+                line=int(line),
+                column=int(col),
+                message=error_details,
+            )
+            for line, col, error_details in errors
+        ]
--- a/openhands/linter/linter.py
+++ b/openhands/linter/linter.py
@@ -0,0 +1,122 @@
+import os
+from collections import defaultdict
+from difflib import SequenceMatcher
+
+from openhands.linter.base import BaseLinter, LinterException, LintResult
+from openhands.linter.languages.python import PythonLinter
+from openhands.linter.languages.treesitter import TreesitterBasicLinter
+
+
+class DefaultLinter(BaseLinter):
+    def __init__(self):
+        self.linters: dict[str, list[BaseLinter]] = defaultdict(list)
+        self.linters['.py'] = [PythonLinter()]
+
+        # Add treesitter linter as a fallback for all linters
+        self.basic_linter = TreesitterBasicLinter()
+        for extension in self.basic_linter.supported_extensions:
+            self.linters[extension].append(self.basic_linter)
+        self._supported_extensions = list(self.linters.keys())
+
+    @property
+    def supported_extensions(self) -> list[str]:
+        return self._supported_extensions
+
+    def lint(self, file_path: str) -> list[LintResult]:
+        if not os.path.isabs(file_path):
+            raise LinterException(f'File path {file_path} is not an absolute path')
+        file_extension = os.path.splitext(file_path)[1]
+
+        linters: list[BaseLinter] = self.linters.get(file_extension, [])
+        for linter in linters:
+            res = linter.lint(file_path)
+            # We always return the first linter's result (higher priority)
+            if res:
+                return res
+        return []
+
+    def lint_file_diff(
+        self, original_file_path: str, updated_file_path: str
+    ) -> list[LintResult]:
+        """Only return lint errors that are introduced by the diff.
+
+        Args:
+            original_file_path: The original file path.
+            updated_file_path: The updated file path.
+
+        Returns:
+            A list of lint errors that are introduced by the diff.
+        """
+        # 1. Lint the original and updated file
+        original_lint_errors: list[LintResult] = self.lint(original_file_path)
+        updated_lint_errors: list[LintResult] = self.lint(updated_file_path)
+
+        # 2. Load the original and updated file content
+        with open(original_file_path, 'r') as f:
+            old_lines = f.readlines()
+        with open(updated_file_path, 'r') as f:
+            new_lines = f.readlines()
+
+        # 3. Get line numbers that are changed & unchanged
+        # Map the line number of the original file to the updated file
+        # NOTE: this only works for lines that are not changed (i.e., equal)
+        old_to_new_line_no_mapping: dict[int, int] = {}
+        replace_or_inserted_lines: list[int] = []
+        for (
+            tag,
+            old_idx_start,
+            old_idx_end,
+            new_idx_start,
+            new_idx_end,
+        ) in SequenceMatcher(
+            isjunk=None,
+            a=old_lines,
+            b=new_lines,
+        ).get_opcodes():
+            if tag == 'equal':
+                for idx, _ in enumerate(old_lines[old_idx_start:old_idx_end]):
+                    old_to_new_line_no_mapping[old_idx_start + idx + 1] = (
+                        new_idx_start + idx + 1
+                    )
+            elif tag == 'replace' or tag == 'insert':
+                for idx, _ in enumerate(old_lines[old_idx_start:old_idx_end]):
+                    replace_or_inserted_lines.append(new_idx_start + idx + 1)
+            else:
+                # omit the case of delete
+                pass
+
+        # 4. Get pre-existing errors in unchanged lines
+        # increased error elsewhere introduced by the newlines
+        # i.e., we omit errors that are already in original files and report new one
+        new_line_no_to_original_errors: dict[int, list[LintResult]] = defaultdict(list)
+        for error in original_lint_errors:
+            if error.line in old_to_new_line_no_mapping:
+                new_line_no_to_original_errors[
+                    old_to_new_line_no_mapping[error.line]
+                ].append(error)
+
+        # 5. Select errors from lint results in new file to report
+        selected_errors = []
+        for error in updated_lint_errors:
+            # 5.1. Error introduced by replace/insert
+            if error.line in replace_or_inserted_lines:
+                selected_errors.append(error)
+            # 5.2. Error introduced by modified lines that impacted
+            #      the unchanged lines that HAVE pre-existing errors
+            elif error.line in new_line_no_to_original_errors:
+                # skip if the error is already reported
+                # or add if the error is new
+                if not any(
+                    original_error.message == error.message
+                    and original_error.column == error.column
+                    for original_error in new_line_no_to_original_errors[error.line]
+                ):
+                    selected_errors.append(error)
+            # 5.3. Error introduced by modified lines that impacted
+            #      the unchanged lines that have NO pre-existing errors
+            else:
+                selected_errors.append(error)
+
+        # 6. Sort errors by line and column
+        selected_errors.sort(key=lambda x: (x.line, x.column))
+        return selected_errors
--- a/openhands/linter/utils/init.py
+++ b/openhands/linter/utils/init.py
@@ -0,0 +1,3 @@
+from .cmd import check_tool_installed, run_cmd
+
+__all__ = ['run_cmd', 'check_tool_installed']
--- a/openhands/linter/utils/cmd.py
+++ b/openhands/linter/utils/cmd.py
@@ -0,0 +1,37 @@
+import os
+import subprocess
+
+
+def run_cmd(cmd: str, cwd: str | None = None) -> str | None:
+    """Run a command and return the output.
+
+    If the command succeeds, return None. If the command fails, return the stdout.
+    """
+
+    process = subprocess.Popen(
+        cmd.split(),
+        cwd=cwd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        encoding='utf-8',
+        errors='replace',
+    )
+    stdout, _ = process.communicate()
+    if process.returncode == 0:
+        return None
+    return stdout
+
+
+def check_tool_installed(tool_name: str) -> bool:
+    """Check if a tool is installed."""
+    try:
+        subprocess.run(
+            [tool_name, '--version'],
+            check=True,
+            cwd=os.getcwd(),
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+        return True
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return False
--- a/openhands/llm/fn_call_converter.py
+++ b/openhands/llm/fn_call_converter.py
@@ -320,8 +320,9 @@ def convert_fncall_messages_to_non_fncall_messages(
    converted_messages = []
    first_user_message_encountered = False
    for message in messages:
-        role = message['role']
-        content = message.get('content', '')
+        role, content = message['role'], message['content']
+        if content is None:
+            content = ''

        # 1. SYSTEM MESSAGES
        # append system prompt suffix to content
@@ -338,7 +339,6 @@ def convert_fncall_messages_to_non_fncall_messages(
                    f'Unexpected content type {type(content)}. Expected str or list. Content: {content}'
                )
            converted_messages.append({'role': 'system', 'content': content})
-
        # 2. USER MESSAGES (no change)
        elif role == 'user':
            # Add in-context learning example for the first user message
@@ -447,12 +447,10 @@ def convert_fncall_messages_to_non_fncall_messages(
                        f'Unexpected content type {type(content)}. Expected str or list. Content: {content}'
                    )
            converted_messages.append({'role': 'assistant', 'content': content})
-
        # 4. TOOL MESSAGES (tool outputs)
        elif role == 'tool':
-            # Convert tool result as user message
-            tool_name = message.get('name', 'function')
-            prefix = f'EXECUTION RESULT of [{tool_name}]:\n'
+            # Convert tool result as assistant message
+            prefix = f'EXECUTION RESULT of [{message["name"]}]:\n'
            # and omit "tool_call_id" AND "name"
            if isinstance(content, str):
                content = prefix + content
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -122,9 +122,6 @@ class LLM(RetryMixin, DebugMixin):
            drop_params=self.config.drop_params,
        )

-        with warnings.catch_warnings():
-            warnings.simplefilter('ignore')
-            self.init_model_info()
        if self.vision_is_active():
            logger.debug('LLM: model has vision enabled')
        if self.is_caching_prompt_active():
@@ -146,6 +143,16 @@ class LLM(RetryMixin, DebugMixin):
            drop_params=self.config.drop_params,
        )

+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')
+            self.init_model_info()
+        if self.vision_is_active():
+            logger.debug('LLM: model has vision enabled')
+        if self.is_caching_prompt_active():
+            logger.debug('LLM: caching prompt enabled')
+        if self.is_function_calling_active():
+            logger.debug('LLM: model supports function calling')
+
        self._completion_unwrapped = self._completion

        @self.retry_decorator(
@@ -157,6 +164,7 @@ class LLM(RetryMixin, DebugMixin):
        )
        def wrapper(*args, **kwargs):
            """Wrapper for the litellm completion function. Logs the input and output of the completion function."""
+
            from openhands.core.utils import json

            messages: list[dict[str, Any]] | dict[str, Any] = []
@@ -335,13 +343,6 @@ class LLM(RetryMixin, DebugMixin):
                pass
        logger.debug(f'Model info: {self.model_info}')

-        if self.config.model.startswith('huggingface'):
-            # HF doesn't support the OpenAI default value for top_p (1)
-            logger.debug(
-                f'Setting top_p to 0.9 for Hugging Face model: {self.config.model}'
-            )
-            self.config.top_p = 0.9 if self.config.top_p == 1 else self.config.top_p
-
        # Set the max tokens in an LM-specific way if not set
        if self.config.max_input_tokens is None:
            if (
@@ -369,16 +370,16 @@ class LLM(RetryMixin, DebugMixin):
                ):
                    self.config.max_output_tokens = self.model_info['max_tokens']

-    def vision_is_active(self) -> bool:
+    def vision_is_active(self):
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            return not self.config.disable_vision and self._supports_vision()

-    def _supports_vision(self) -> bool:
+    def _supports_vision(self):
        """Acquire from litellm if model is vision capable.

        Returns:
-            bool: True if model is vision capable. Return False if model not supported by litellm.
+            bool: True if model is vision capable. If model is not supported by litellm, it will return False.
        """
        # litellm.supports_vision currently returns False for 'openai/gpt-...' or 'anthropic/claude-...' (with prefixes)
        # but model_info will have the correct value for some reason.
@@ -476,7 +477,7 @@ class LLM(RetryMixin, DebugMixin):
        if stats:
            logger.debug(stats)

-    def get_token_count(self, messages) -> int:
+    def get_token_count(self, messages):
        """Get the number of tokens in a list of messages.

        Args:
@@ -491,7 +492,7 @@ class LLM(RetryMixin, DebugMixin):
            # TODO: this is to limit logspam in case token count is not supported
            return 0

-    def _is_local(self) -> bool:
+    def _is_local(self):
        """Determines if the system is using a locally running LLM.

        Returns:
@@ -506,7 +507,7 @@ class LLM(RetryMixin, DebugMixin):
                return True
        return False

-    def _completion_cost(self, response) -> float:
+    def _completion_cost(self, response):
        """Calculate the cost of a completion response based on the model.  Local models are treated as free.
        Add the current cost into total cost in metrics.

@@ -555,7 +556,7 @@ class LLM(RetryMixin, DebugMixin):
    def __repr__(self):
        return str(self)

-    def reset(self) -> None:
+    def reset(self):
        self.metrics.reset()

    def format_messages_for_llm(self, messages: Message | list[Message]) -> list[dict]:
@@ -566,7 +567,6 @@ class LLM(RetryMixin, DebugMixin):
        for message in messages:
            message.cache_enabled = self.is_caching_prompt_active()
            message.vision_enabled = self.vision_is_active()
-            message.function_calling_enabled = self.is_function_calling_active()

        # let pydantic handle the serialization
        return [message.model_dump() for message in messages]
--- a/openhands/resolver/README.md
+++ b/openhands/resolver/README.md
@@ -15,8 +15,6 @@ Follow these steps to use this workflow in your own repository:

 1. [Create a personal access token](https://github.com/settings/tokens?type=beta) with read/write scope for "contents", "issues", "pull requests", and "workflows"

-   Note: If you're working with an organizational repository, you may need to configure the organization's personal access token policy first. See [Setting a personal access token policy for your organization](https://docs.github.com/en/organizations/managing-programmatic-access-to-your-organization/setting-a-personal-access-token-policy-for-your-organization) for details.
-
 2. Create an API key for the [Claude API](https://www.anthropic.com/api) (recommended) or another supported LLM service

 3. Copy `examples/openhands-resolver.yml` to your repository's `.github/workflows/` directory
@@ -85,14 +83,11 @@ pip install openhands-ai
 3. Set up environment variables:

 ```bash
-
 # GitHub credentials
-
 export GITHUB_TOKEN="your-github-token"
 export GITHUB_USERNAME="your-github-username"  # Optional, defaults to token owner

 # LLM configuration
-
 export LLM_MODEL="anthropic/claude-3-5-sonnet-20241022"  # Recommended
 export LLM_API_KEY="your-llm-api-key"
 export LLM_BASE_URL="your-api-url"  # Optional, for API proxies
--- a/openhands/resolver/examples/openhands-resolver.yml
+++ b/openhands/resolver/examples/openhands-resolver.yml
@@ -7,10 +7,6 @@ on:
    types: [labeled]
  issue_comment:
    types: [created]
-  pull_request_review_comment:
-    types: [created]
-  pull_request_review:
-    types: [submitted]

 permissions:
  contents: write
@@ -20,24 +16,16 @@ permissions:
 jobs:
  call-openhands-resolver:
    if: |
-      github.event.label.name == 'fix-me' ||
-
-      (
-        ((github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment') &&
-         (startsWith(github.event.comment.body, inputs.macro || '@openhands-agent') || startsWith(github.event.comment.body, inputs.macro || vars.OPENHANDS_MACRO)) &&
-        (github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER')
-        ) ||
-
-        (github.event_name == 'pull_request_review' &&
-        (startsWith(github.event.review.body, inputs.macro || '@openhands-agent') || startsWith(github.event.review.body, inputs.macro || vars.OPENHANDS_MACRO)) &&
-        (github.event.review.author_association == 'OWNER' || github.event.review.author_association == 'COLLABORATOR' || github.event.review.author_association == 'MEMBER')
-        )
-      )
-
+      ${{
+        github.event.label.name == 'fix-me' ||
+        (github.event_name == 'issue_comment' &&
+        startsWith(github.event.comment.body, vars.OPENHANDS_MACRO || '@openhands-agent') &&
+        (github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER'))
+      }}
    uses: All-Hands-AI/OpenHands/.github/workflows/openhands-resolver.yml@main
    with:
      macro: ${{ vars.OPENHANDS_MACRO || '@openhands-agent' }}
-      max_iterations: ${{ vars.OPENHANDS_MAX_ITER || 50 }}
+      max_iterations: 50
    secrets:
      PAT_TOKEN: ${{ secrets.PAT_TOKEN }}
      PAT_USERNAME: ${{ secrets.PAT_USERNAME }}
--- a/openhands/resolver/issue_definitions.py
+++ b/openhands/resolver/issue_definitions.py
@@ -18,9 +18,7 @@ class IssueHandlerInterface(ABC):
    issue_type: ClassVar[str]

    @abstractmethod
-    def get_converted_issues(
-        self, issue_numbers: list[int] | None = None, comment_id: int | None = None
-    ) -> list[GithubIssue]:
+    def get_converted_issues(self, comment_id: int | None = None) -> list[GithubIssue]:
        """Download issues from GitHub."""
        pass

@@ -85,21 +83,7 @@ class IssueHandler(IssueHandlerInterface):
        return re.findall(image_pattern, issue_body)

    def _extract_issue_references(self, body: str) -> list[int]:
-        # First, remove code blocks as they may contain false positives
-        body = re.sub(r'```.*?```', '', body, flags=re.DOTALL)
-
-        # Remove inline code
-        body = re.sub(r'`[^`]*`', '', body)
-
-        # Remove URLs that contain hash symbols
-        body = re.sub(r'https?://[^\s)]*#\d+[^\s)]*', '', body)
-
-        # Now extract issue numbers, making sure they're not part of other text
-        # The pattern matches #number that:
-        # 1. Is at the start of text or after whitespace/punctuation
-        # 2. Is followed by whitespace, punctuation, or end of text
-        # 3. Is not part of a URL
-        pattern = r'(?:^|[\s\[({]|[^\w#])#(\d+)(?=[\s,.\])}]|$)'
+        pattern = r'#(\d+)'
        return [int(match) for match in re.findall(pattern, body)]

    def _get_issue_comments(
@@ -140,29 +124,13 @@ class IssueHandler(IssueHandlerInterface):

        return all_comments if all_comments else None

-    def get_converted_issues(
-        self, issue_numbers: list[int] | None = None, comment_id: int | None = None
-    ) -> list[GithubIssue]:
+    def get_converted_issues(self, comment_id: int | None = None) -> list[GithubIssue]:
        """Download issues from Github.

        Returns:
            List of Github issues.
        """
-
-        if not issue_numbers:
-            raise ValueError('Unspecified issue number')
-
        all_issues = self._download_issues_from_github()
-        logger.info(f'Limiting resolving to issues {issue_numbers}.')
-        all_issues = [
-            issue
-            for issue in all_issues
-            if issue['number'] in issue_numbers and 'pull_request' not in issue
-        ]
-
-        if len(issue_numbers) == 1 and not all_issues:
-            raise ValueError(f'Issue {issue_numbers[0]} not found')
-
        converted_issues = []
        for issue in all_issues:
            if any([issue.get(key) is None for key in ['number', 'title', 'body']]):
@@ -171,6 +139,9 @@ class IssueHandler(IssueHandlerInterface):
                )
                continue

+            if 'pull_request' in issue:
+                continue
+
            # Get issue thread comments
            thread_comments = self._get_issue_comments(
                issue['number'], comment_id=comment_id
@@ -484,33 +455,22 @@ class PRHandler(IssueHandler):
        )

        for issue_number in unique_issue_references:
-            try:
-                url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/{issue_number}'
-                headers = {
-                    'Authorization': f'Bearer {self.token}',
-                    'Accept': 'application/vnd.github.v3+json',
-                }
-                response = requests.get(url, headers=headers)
-                response.raise_for_status()
-                issue_data = response.json()
-                issue_body = issue_data.get('body', '')
-                if issue_body:
-                    closing_issues.append(issue_body)
-            except requests.exceptions.RequestException as e:
-                logger.warning(f'Failed to fetch issue {issue_number}: {str(e)}')
+            url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/{issue_number}'
+            headers = {
+                'Authorization': f'Bearer {self.token}',
+                'Accept': 'application/vnd.github.v3+json',
+            }
+            response = requests.get(url, headers=headers)
+            response.raise_for_status()
+            issue_data = response.json()
+            issue_body = issue_data.get('body', '')
+            if issue_body:
+                closing_issues.append(issue_body)

        return closing_issues

-    def get_converted_issues(
-        self, issue_numbers: list[int] | None = None, comment_id: int | None = None
-    ) -> list[GithubIssue]:
-        if not issue_numbers:
-            raise ValueError('Unspecified issue numbers')
-
+    def get_converted_issues(self, comment_id: int | None = None) -> list[GithubIssue]:
        all_issues = self._download_issues_from_github()
-        logger.info(f'Limiting resolving to issues {issue_numbers}.')
-        all_issues = [issue for issue in all_issues if issue['number'] in issue_numbers]
-
        converted_issues = []
        for issue in all_issues:
            # For PRs, body can be None
@@ -599,7 +559,9 @@ class PRHandler(IssueHandler):
        # Format thread comments if they exist
        thread_context = ''
        if issue.thread_comments:
-            thread_context = '\n---\n'.join(issue.thread_comments)
+            thread_context = '\n\nPR Thread Comments:\n' + '\n---\n'.join(
+                issue.thread_comments
+            )
            images.extend(self._extract_image_urls(thread_context))

        instruction = template.render(
--- a/openhands/resolver/prompts/resolve/basic-followup.jinja
+++ b/openhands/resolver/prompts/resolve/basic-followup.jinja
@@ -3,7 +3,7 @@ The feedback may be addressed to specific code files. In this case the file loca
 Please update the code based on the feedback for the repository in /workspace.
 An environment has been set up for you to start working. You may assume all necessary tools are installed.

-# Issues addressed
+# Issues addressed 
 {{ issues }}

 # Review comments
@@ -15,13 +15,10 @@ An environment has been set up for you to start working. You may assume all nece
 # Review thread files
 {{ files }}

-# PR Thread Comments
-{{ thread_context }}
-
 IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.
 You SHOULD INCLUDE PROPER INDENTATION in your edit commands.{% if repo_instruction %}

 Some basic information about this repository:
 {{ repo_instruction }}{% endif %}

-When you think you have fixed the issue through code changes, please finish the interaction.
+When you think you have fixed the issue through code changes, please finish the interaction.
--- a/openhands/resolver/resolve_all_issues.py
+++ b/openhands/resolver/resolve_all_issues.py
@@ -83,10 +83,11 @@ async def resolve_issues(
    issue_handler = issue_handler_factory(issue_type, owner, repo, token)

    # Load dataset
-    issues: list[GithubIssue] = issue_handler.get_converted_issues(
-        issue_numbers=issue_numbers
-    )
+    issues: list[GithubIssue] = issue_handler.get_converted_issues()

+    if issue_numbers is not None:
+        issues = [issue for issue in issues if issue.number in issue_numbers]
+        logger.info(f'Limiting resolving to issues {issue_numbers}.')
    if limit_issues is not None:
        issues = issues[:limit_issues]
        logger.info(f'Limiting resolving to first {limit_issues} issues.')
--- a/openhands/resolver/resolve_issue.py
+++ b/openhands/resolver/resolve_issue.py
@@ -199,7 +199,7 @@ async def process_issue(
    )
    config.set_llm_config(llm_config)

-    runtime = create_runtime(config)
+    runtime = create_runtime(config, sid=f'{issue.number}')
    await runtime.connect()

    async def on_event(evt):
@@ -339,10 +339,13 @@ async def resolve_issue(

    # Load dataset
    issues: list[GithubIssue] = issue_handler.get_converted_issues(
-        issue_numbers=[issue_number], comment_id=comment_id
+        comment_id=comment_id
    )

-    issue = issues[0]
+    # Find the specific issue
+    issue = next((i for i in issues if i.number == issue_number), None)
+    if not issue:
+        raise ValueError(f'Issue {issue_number} not found')

    if comment_id is not None:
        if (
--- a/openhands/resolver/send_pull_request.py
+++ b/openhands/resolver/send_pull_request.py
@@ -203,7 +203,6 @@ def send_pull_request(
    pr_type: str,
    fork_owner: str | None = None,
    additional_message: str | None = None,
-    target_branch: str | None = None,
 ) -> str:
    if pr_type not in ['branch', 'draft', 'ready']:
        raise ValueError(f'Invalid pr_type: {pr_type}')
@@ -225,19 +224,12 @@ def send_pull_request(
        attempt += 1
        branch_name = f'{base_branch_name}-try{attempt}'

-    # Get the default branch or use specified target branch
-    print('Getting base branch...')
-    if target_branch:
-        base_branch = target_branch
-        # Verify the target branch exists
-        response = requests.get(f'{base_url}/branches/{target_branch}', headers=headers)
-        if response.status_code != 200:
-            raise ValueError(f'Target branch {target_branch} does not exist')
-    else:
-        response = requests.get(f'{base_url}', headers=headers)
-        response.raise_for_status()
-        base_branch = response.json()['default_branch']
-    print(f'Base branch: {base_branch}')
+    # Get the default branch
+    print('Getting default branch...')
+    response = requests.get(f'{base_url}', headers=headers)
+    response.raise_for_status()
+    default_branch = response.json()['default_branch']
+    print(f'Default branch: {default_branch}')

    # Create and checkout the new branch
    print('Creating new branch...')
@@ -287,7 +279,7 @@ def send_pull_request(
            'title': pr_title,  # No need to escape title for GitHub API
            'body': pr_body,
            'head': branch_name,
-            'base': base_branch,
+            'base': default_branch,
            'draft': pr_type == 'draft',
        }
        response = requests.post(f'{base_url}/pulls', headers=headers, json=data)
@@ -443,7 +435,6 @@ def process_single_issue(
    llm_config: LLMConfig,
    fork_owner: str | None,
    send_on_failure: bool,
-    target_branch: str | None = None,
 ) -> None:
    if not resolver_output.success and not send_on_failure:
        print(
@@ -493,7 +484,6 @@ def process_single_issue(
            llm_config=llm_config,
            fork_owner=fork_owner,
            additional_message=resolver_output.success_explanation,
-            target_branch=target_branch,
        )


@@ -518,7 +508,6 @@ def process_all_successful_issues(
                llm_config,
                fork_owner,
                False,
-                None,
            )


@@ -584,12 +573,6 @@ def main():
        default=None,
        help='Base URL for the LLM model.',
    )
-    parser.add_argument(
-        '--target-branch',
-        type=str,
-        default=None,
-        help='Target branch to create the pull request against (defaults to repository default branch)',
-    )
    my_args = parser.parse_args()

    github_token = (
@@ -642,7 +625,6 @@ def main():
            llm_config,
            my_args.fork_owner,
            my_args.send_on_failure,
-            my_args.target_branch,
        )


--- a/openhands/runtime/action_execution_server.py
+++ b/openhands/runtime/action_execution_server.py
@@ -52,7 +52,7 @@ from openhands.runtime.utils.bash import BashSession
 from openhands.runtime.utils.files import insert_lines, read_lines
 from openhands.runtime.utils.runtime_init import init_user_and_working_directory
 from openhands.runtime.utils.system import check_port_available
-from openhands.utils.async_utils import call_sync_from_async, wait_all
+from openhands.utils.async_utils import wait_all


 class ActionRequest(BaseModel):
@@ -170,8 +170,7 @@ class ActionExecutor:
    async def run(
        self, action: CmdRunAction
    ) -> CmdOutputObservation | ErrorObservation:
-        obs = await call_sync_from_async(self.bash_session.run, action)
-        return obs
+        return self.bash_session.run(action)

    async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
        if 'jupyter' in self.plugins:
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -47,19 +47,11 @@ STATUS_MESSAGES = {
 }


-class RuntimeUnavailableError(Exception):
+class RuntimeNotReadyError(Exception):
    pass


-class RuntimeNotReadyError(RuntimeUnavailableError):
-    pass
-
-
-class RuntimeDisconnectedError(RuntimeUnavailableError):
-    pass
-
-
-class RuntimeNotFoundError(RuntimeUnavailableError):
+class RuntimeDisconnectedError(Exception):
    pass


--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -34,11 +34,7 @@ from openhands.events.observation import (
 )
 from openhands.events.serialization import event_to_dict, observation_from_dict
 from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS
-from openhands.runtime.base import (
-    Runtime,
-    RuntimeDisconnectedError,
-    RuntimeNotFoundError,
-)
+from openhands.runtime.base import Runtime
 from openhands.runtime.builder import DockerRuntimeBuilder
 from openhands.runtime.impl.eventstream.containers import remove_all_containers
 from openhands.runtime.plugins import PluginRequirement
@@ -428,22 +424,10 @@ class EventStreamRuntime(Runtime):

    @tenacity.retry(
        stop=tenacity.stop_after_delay(120) | stop_if_should_exit(),
-        retry=tenacity.retry_if_exception_type(
-            (ConnectionError, requests.exceptions.ConnectionError)
-        ),
-        reraise=True,
+        reraise=(ConnectionRefusedError,),
        wait=tenacity.wait_fixed(2),
    )
    def _wait_until_alive(self):
-        try:
-            container = self.docker_client.containers.get(self.container_name)
-            if container.status == 'exited':
-                raise RuntimeDisconnectedError(
-                    f'Container {self.container_name} has exited.'
-                )
-        except docker.errors.NotFound:
-            raise RuntimeNotFoundError(f'Container {self.container_name} not found.')
-
        self._refresh_logs()
        if not self.log_buffer:
            raise RuntimeError('Runtime client is not ready.')
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -31,7 +31,6 @@ from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS
 from openhands.runtime.base import (
    Runtime,
    RuntimeDisconnectedError,
-    RuntimeNotFoundError,
    RuntimeNotReadyError,
 )
 from openhands.runtime.builder.remote import RemoteRuntimeBuilder
@@ -110,9 +109,7 @@ class RemoteRuntime(Runtime):
        if existing_runtime:
            self.log('debug', f'Using existing runtime with ID: {self.runtime_id}')
        elif self.attach_to_existing:
-            raise RuntimeNotFoundError(
-                f'Could not find existing runtime for SID: {self.sid}'
-            )
+            raise RuntimeError('Could not find existing runtime to attach to.')
        else:
            self.send_status_message('STATUS$STARTING_CONTAINER')
            if self.config.sandbox.runtime_container_image is None:
--- a/openhands/runtime/utils/edit.py
+++ b/openhands/runtime/utils/edit.py
@@ -4,11 +4,13 @@ import re
 import tempfile
 from abc import ABC, abstractmethod

-from openhands_aci.utils.diff import get_diff
-
 from openhands.core.config import AppConfig
 from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import FileEditAction, FileReadAction, FileWriteAction
+from openhands.events.action import (
+    FileEditAction,
+    FileReadAction,
+    FileWriteAction,
+)
 from openhands.events.observation import (
    ErrorObservation,
    FileEditObservation,
@@ -20,6 +22,7 @@ from openhands.linter import DefaultLinter
 from openhands.llm.llm import LLM
 from openhands.llm.metrics import Metrics
 from openhands.utils.chunk_localizer import Chunk, get_top_k_chunk_matches
+from openhands.utils.diff import get_diff

 SYS_MSG = """Your job is to produce a new version of the file based on the old version and the
 provided draft of the new version. The provided draft may be incomplete (it may skip lines) and/or incorrectly indented. You should try to apply the changes present in the draft to the old version, and output a new version of the file.
--- a/openhands/server/listen.py
+++ b/openhands/server/listen.py
@@ -11,6 +11,7 @@ import requests
 from pathspec import PathSpec
 from pathspec.patterns import GitWildMatchPattern

+from openhands.runtime.impl.remote.remote_runtime import RemoteRuntime
 from openhands.security.options import SecurityAnalyzers
 from openhands.server.data_models.feedback import FeedbackDataModel, store_feedback
 from openhands.server.github import (
@@ -34,7 +35,6 @@ from fastapi import (
    Request,
    UploadFile,
    WebSocket,
-    WebSocketDisconnect,
    status,
 )
 from fastapi.responses import FileResponse, JSONResponse
@@ -62,14 +62,9 @@ from openhands.events.observation import (
 from openhands.events.serialization import event_to_dict
 from openhands.events.stream import AsyncEventStreamWrapper
 from openhands.llm import bedrock
-from openhands.runtime.base import Runtime, RuntimeUnavailableError
+from openhands.runtime.base import Runtime
 from openhands.server.auth.auth import get_sid_from_token, sign_token
-from openhands.server.middleware import (
-    InMemoryRateLimiter,
-    LocalhostCORSMiddleware,
-    NoCacheMiddleware,
-    RateLimitMiddleware,
-)
+from openhands.server.middleware import LocalhostCORSMiddleware, NoCacheMiddleware
 from openhands.server.session import SessionManager

 load_dotenv()
@@ -89,15 +84,6 @@ app.add_middleware(


 app.add_middleware(NoCacheMiddleware)
-app.add_middleware(
-    RateLimitMiddleware, rate_limiter=InMemoryRateLimiter(requests=10, seconds=1)
-)
-
-
-@app.get('/health')
-async def health():
-    return 'OK'
-

 security_scheme = HTTPBearer()

@@ -253,8 +239,7 @@ async def attach_session(request: Request, call_next):
    request.state.conversation = await session_manager.attach_to_conversation(
        request.state.sid
    )
-    if not request.state.conversation:
-        logger.error(f'Runtime not found for session: {request.state.sid}')
+    if request.state.conversation is None:
        return JSONResponse(
            status_code=status.HTTP_404_NOT_FOUND,
            content={'error': 'Session not found'},
@@ -360,13 +345,7 @@ async def websocket_endpoint(websocket: WebSocket):

    latest_event_id = -1
    if websocket.query_params.get('latest_event_id'):
-        try:
-            latest_event_id = int(websocket.query_params.get('latest_event_id'))
-        except ValueError:
-            logger.warning(
-                f'Invalid latest_event_id: {websocket.query_params.get("latest_event_id")}'
-            )
-            pass
+        latest_event_id = int(websocket.query_params.get('latest_event_id'))

    async_stream = AsyncEventStreamWrapper(
        session.agent_session.event_stream, latest_event_id + 1
@@ -383,14 +362,7 @@ async def websocket_endpoint(websocket: WebSocket):
            ),
        ):
            continue
-        try:
-            await websocket.send_json(event_to_dict(event))
-        except WebSocketDisconnect:
-            logger.warning(
-                'Websocket disconnected while sending event history, before loop started'
-            )
-            session.close()
-            return
+        await websocket.send_json(event_to_dict(event))

    await session.loop_recv()

@@ -517,14 +489,7 @@ async def list_files(request: Request, path: str | None = None):
        )

    runtime: Runtime = request.state.conversation.runtime
-    try:
-        file_list = await call_sync_from_async(runtime.list_files, path)
-    except RuntimeUnavailableError as e:
-        logger.error(f'Error listing files: {e}', exc_info=True)
-        return JSONResponse(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            content={'error': f'Error listing files: {e}'},
-        )
+    file_list = await call_sync_from_async(runtime.list_files, path)
    if path:
        file_list = [os.path.join(path, f) for f in file_list]

@@ -544,14 +509,7 @@ async def list_files(request: Request, path: str | None = None):
        file_list = [entry for entry in file_list if not spec.match_file(entry)]
        return file_list

-    try:
-        file_list = await filter_for_gitignore(file_list, '')
-    except RuntimeUnavailableError as e:
-        logger.error(f'Error filtering files: {e}', exc_info=True)
-        return JSONResponse(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            content={'error': f'Error filtering files: {e}'},
-        )
+    file_list = await filter_for_gitignore(file_list, '')

    return file_list

@@ -580,14 +538,7 @@ async def select_file(file: str, request: Request):

    file = os.path.join(runtime.config.workspace_mount_path_in_sandbox, file)
    read_action = FileReadAction(file)
-    try:
-        observation = await call_sync_from_async(runtime.run_action, read_action)
-    except RuntimeUnavailableError as e:
-        logger.error(f'Error opening file {file}: {e}', exc_info=True)
-        return JSONResponse(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            content={'error': f'Error opening file: {e}'},
-        )
+    observation = await call_sync_from_async(runtime.run_action, read_action)

    if isinstance(observation, FileReadObservation):
        content = observation.content
@@ -614,21 +565,27 @@ def sanitize_filename(filename):
    return filename


-@app.get('/api/conversation')
+@app.get('/api/config')
 async def get_remote_runtime_config(request: Request):
    """Retrieve the remote runtime configuration.

    Currently, this is the runtime ID.
    """
-    runtime = request.state.conversation.runtime
-    runtime_id = runtime.runtime_id if hasattr(runtime, 'runtime_id') else None
-    session_id = runtime.sid if hasattr(runtime, 'sid') else None
-    return JSONResponse(
-        content={
-            'runtime_id': runtime_id,
-            'session_id': session_id,
-        }
-    )
+    try:
+        runtime = request.state.conversation.runtime
+        if isinstance(runtime, RemoteRuntime):
+            return JSONResponse(content={'runtime_id': runtime.runtime_id})
+        else:
+            return JSONResponse(
+                status_code=status.HTTP_404_NOT_FOUND,
+                content={'error': 'Runtime ID not available in this environment'},
+            )
+    except Exception as e:
+        logger.error(e)
+        return JSONResponse(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            content={'error': 'Something went wrong'},
+        )


@app.post('/api/upload-files')
@@ -683,20 +640,9 @@ async def upload_file(request: Request, files: list[UploadFile]):
                    tmp_file.flush()

                runtime: Runtime = request.state.conversation.runtime
-                try:
-                    await call_sync_from_async(
-                        runtime.copy_to,
-                        tmp_file_path,
-                        runtime.config.workspace_mount_path_in_sandbox,
-                    )
-                except RuntimeUnavailableError as e:
-                    logger.error(
-                        f'Error saving file {safe_filename}: {e}', exc_info=True
-                    )
-                    return JSONResponse(
-                        status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                        content={'error': f'Error saving file: {e}'},
-                    )
+                runtime.copy_to(
+                    tmp_file_path, runtime.config.workspace_mount_path_in_sandbox
+                )
            uploaded_files.append(safe_filename)

        response_content = {
@@ -827,14 +773,7 @@ async def save_file(request: Request):
            runtime.config.workspace_mount_path_in_sandbox, file_path
        )
        write_action = FileWriteAction(file_path, content)
-        try:
-            observation = await call_sync_from_async(runtime.run_action, write_action)
-        except RuntimeUnavailableError as e:
-            logger.error(f'Error saving file: {e}', exc_info=True)
-            return JSONResponse(
-                status_code=500,
-                content={'error': f'Error saving file: {e}'},
-            )
+        observation = await call_sync_from_async(runtime.run_action, write_action)

        if isinstance(observation, FileWriteObservation):
            return JSONResponse(
@@ -885,14 +824,7 @@ async def zip_current_workspace(request: Request, background_tasks: BackgroundTa
        logger.debug('Zipping workspace')
        runtime: Runtime = request.state.conversation.runtime
        path = runtime.config.workspace_mount_path_in_sandbox
-        try:
-            zip_file = await call_sync_from_async(runtime.copy_from, path)
-        except RuntimeUnavailableError as e:
-            logger.error(f'Error zipping workspace: {e}', exc_info=True)
-            return JSONResponse(
-                status_code=500,
-                content={'error': f'Error zipping workspace: {e}'},
-            )
+        zip_file = await call_sync_from_async(runtime.copy_from, path)
        response = FileResponse(
            path=zip_file,
            filename='workspace.zip',
--- a/openhands/server/middleware.py
+++ b/openhands/server/middleware.py
@@ -1,11 +1,6 @@
-import asyncio
-from collections import defaultdict
-from datetime import datetime, timedelta
 from urllib.parse import urlparse

-from fastapi import Request
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.types import ASGIApp

@@ -46,56 +41,3 @@ class NoCacheMiddleware(BaseHTTPMiddleware):
            response.headers['Pragma'] = 'no-cache'
            response.headers['Expires'] = '0'
        return response
-
-
-class InMemoryRateLimiter:
-    history: dict
-    requests: int
-    seconds: int
-    sleep_seconds: int
-
-    def __init__(self, requests: int = 2, seconds: int = 1, sleep_seconds: int = 1):
-        self.requests = requests
-        self.seconds = seconds
-        self.sleep_seconds = sleep_seconds
-        self.history = defaultdict(list)
-
-    def _clean_old_requests(self, key: str) -> None:
-        now = datetime.now()
-        cutoff = now - timedelta(seconds=self.seconds)
-        self.history[key] = [ts for ts in self.history[key] if ts > cutoff]
-
-    async def __call__(self, request: Request) -> bool:
-        key = request.client.host
-        now = datetime.now()
-
-        self._clean_old_requests(key)
-
-        self.history[key].append(now)
-
-        if len(self.history[key]) > self.requests * 2:
-            return False
-        elif len(self.history[key]) > self.requests:
-            if self.sleep_seconds > 0:
-                await asyncio.sleep(self.sleep_seconds)
-                return True
-            else:
-                return False
-
-        return True
-
-
-class RateLimitMiddleware(BaseHTTPMiddleware):
-    def __init__(self, app: ASGIApp, rate_limiter: InMemoryRateLimiter):
-        super().__init__(app)
-        self.rate_limiter = rate_limiter
-
-    async def dispatch(self, request, call_next):
-        ok = await self.rate_limiter(request)
-        if not ok:
-            return JSONResponse(
-                status_code=429,
-                content={'message': 'Too many requests'},
-                headers={'Retry-After': '1'},
-            )
-        return await call_next(request)
--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -11,7 +11,7 @@ from openhands.events.action.agent import ChangeAgentStateAction
 from openhands.events.event import EventSource
 from openhands.events.stream import EventStream
 from openhands.runtime import get_runtime_cls
-from openhands.runtime.base import Runtime, RuntimeUnavailableError
+from openhands.runtime.base import Runtime
 from openhands.security import SecurityAnalyzer, options
 from openhands.storage.files import FileStore

@@ -194,13 +194,13 @@ class AgentSession:

        try:
            await self.runtime.connect()
-        except RuntimeUnavailableError as e:
+        except Exception as e:
            logger.error(f'Runtime initialization failed: {e}', exc_info=True)
            if self._status_callback:
                self._status_callback(
                    'error', 'STATUS$ERROR_RUNTIME_DISCONNECTED', str(e)
                )
-            return
+            raise

        if self.runtime is not None:
            logger.debug(
--- a/openhands/server/session/manager.py
+++ b/openhands/server/session/manager.py
@@ -6,7 +6,6 @@ from fastapi import WebSocket
 from openhands.core.config import AppConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.stream import session_exists
-from openhands.runtime.base import RuntimeUnavailableError
 from openhands.server.session.conversation import Conversation
 from openhands.server.session.session import Session
 from openhands.storage.files import FileStore
@@ -27,11 +26,7 @@ class SessionManager:
        if not await session_exists(sid, self.file_store):
            return None
        c = Conversation(sid, file_store=self.file_store, config=self.config)
-        try:
-            await c.connect()
-        except RuntimeUnavailableError as e:
-            logger.error(f'Error connecting to conversation {c.sid}: {e}')
-            return None
+        await c.connect()
        end_time = time.time()
        logger.info(
            f'Conversation {c.sid} connected in {end_time - start_time} seconds'
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -57,9 +57,6 @@ class Session:
                self.websocket = None
        finally:
            self.agent_session.close()
-            del (
-                self.agent_session
-            )  # FIXME: this should not be necessary but it mitigates a memory leak

    async def loop_recv(self):
        try:
--- a/openhands/utils/diff.py
+++ b/openhands/utils/diff.py
@@ -0,0 +1,41 @@
+import difflib
+
+import whatthepatch
+
+
+def get_diff(old_contents: str, new_contents: str, filepath: str = 'file') -> str:
+    diff = list(
+        difflib.unified_diff(
+            old_contents.split('\n'),
+            new_contents.split('\n'),
+            fromfile=filepath,
+            tofile=filepath,
+            # do not output unchange lines
+            # because they can cause `parse_diff` to fail
+            n=0,
+        )
+    )
+    return '\n'.join(map(lambda x: x.rstrip(), diff))
+
+
+def parse_diff(diff_patch: str) -> list[whatthepatch.patch.Change]:
+    # handle empty patch
+    if diff_patch.strip() == '':
+        return []
+
+    patch = whatthepatch.parse_patch(diff_patch)
+    patch_list = list(patch)
+    assert len(patch_list) == 1, (
+        'parse_diff only supports single file diff. But got:\nPATCH:\n'
+        + diff_patch
+        + '\nPATCH LIST:\n'
+        + str(patch_list)
+    )
+    changes = patch_list[0].changes
+
+    # ignore changes that are the same (i.e., old_lineno == new_lineno)
+    output_changes = []
+    for change in changes:
+        if change.old != change.new:
+            output_changes.append(change)
+    return output_changes
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.

 [[package]]
 name = "aenum"
@@ -5629,6 +5629,7 @@ optional = false
 python-versions = ">=3.6"
 files = [
    {file = "opencv-python-4.10.0.84.tar.gz", hash = "sha256:72d234e4582e9658ffea8e9cae5b63d488ad06994ef12d81dc303b17472f3526"},
+    {file = "opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:fc182f8f4cda51b45f01c64e4cbedfc2f00aff799debebc305d8d0210c43f251"},
    {file = "opencv_python-4.10.0.84-cp37-abi3-macosx_12_0_x86_64.whl", hash = "sha256:71e575744f1d23f79741450254660442785f45a0797212852ee5199ef12eed98"},
    {file = "opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09a332b50488e2dda866a6c5573ee192fe3583239fb26ff2f7f9ceb0bc119ea6"},
    {file = "opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ace140fc6d647fbe1c692bcb2abce768973491222c067c131d80957c595b71f"},
@@ -5641,18 +5642,17 @@ numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""}

 [[package]]
 name = "openhands-aci"
-version = "0.1.1"
+version = "0.1.0"
 description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands."
 optional = false
 python-versions = "<4.0,>=3.12"
 files = [
-    {file = "openhands_aci-0.1.1-py3-none-any.whl", hash = "sha256:8831f97b887571005dca0d70a9f6f0a4f9feb35d3d41f499e70d72b5fb68a599"},
-    {file = "openhands_aci-0.1.1.tar.gz", hash = "sha256:705b74a12a8f428e64295b5de125f553500f62ef5ab3a5a6284d8fcf638025e6"},
+    {file = "openhands_aci-0.1.0-py3-none-any.whl", hash = "sha256:f28e5a32e394d1e643f79bf8af27fe44d039cb71729d590f9f3ee0c23c075f00"},
+    {file = "openhands_aci-0.1.0.tar.gz", hash = "sha256:babc55f516efbb27eb7e528662e14b75c902965c48a110408fda824b83ea4461"},
 ]

 [package.dependencies]
 diskcache = ">=5.6.3,<6.0.0"
-flake8 = "*"
 gitpython = "*"
 grep-ast = "0.3.3"
 litellm = "*"
@@ -5661,7 +5661,6 @@ numpy = "*"
 pandas = "*"
 scipy = "*"
 tree-sitter = "0.21.3"
-whatthepatch = ">=1.0.6,<2.0.0"

 [[package]]
 name = "opentelemetry-api"
@@ -10212,4 +10211,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "b710448cff0788b563f4d7614fca438ab0b9fe19903a061750012c56da95ff37"
+content-hash = "8718ffe2ed836fca6c646c37bdad2c9c8e63ebd7ec881f420148fef5095d19e4"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "openhands-ai"
-version = "0.14.1"
+version = "0.14.0"
 description = "OpenHands: Code Less, Make More"
 authors = ["OpenHands"]
 license = "MIT"
@@ -41,6 +41,7 @@ pyarrow = "17.0.0" # transitive dependency, pinned here to avoid conflicts
 tenacity = "^8.5.0"
 zope-interface = "7.1.1"
 pathspec = "^0.12.1"
+watchdog = "^3.0.0"
 google-cloud-aiplatform = "*"
 anthropic = {extras = ["vertex"], version = "*"}
 grep-ast = "0.3.3"
@@ -63,7 +64,7 @@ opentelemetry-exporter-otlp-proto-grpc = "1.25.0"
 modal = "^0.64.145"
 runloop-api-client = "0.7.0"
 pygithub = "^2.5.0"
-openhands-aci = "^0.1.1"
+openhands-aci = "^0.1.0"

 [tool.poetry.group.llama-index.dependencies]
 llama-index = "*"
@@ -95,6 +96,7 @@ reportlab = "*"
 [tool.coverage.run]
 concurrency = ["gevent"]

+
 [tool.poetry.group.runtime.dependencies]
 jupyterlab = "*"
 notebook = "*"
@@ -125,6 +127,7 @@ ignore = ["D1"]
 [tool.ruff.lint.pydocstyle]
 convention = "google"

+
 [tool.poetry.group.evaluation.dependencies]
 streamlit = "*"
 whatthepatch = "*"
--- a/tests/runtime/test_edit.py
+++ b/tests/runtime/test_edit.py
@@ -3,12 +3,16 @@
 import os

 import pytest
-from conftest import TEST_IN_CI, _close_test_runtime, _load_runtime
-from openhands_aci.utils.diff import get_diff
+from conftest import (
+    TEST_IN_CI,
+    _close_test_runtime,
+    _load_runtime,
+)

 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import FileEditAction, FileReadAction
 from openhands.events.observation import FileEditObservation
+from openhands.utils.diff import get_diff

 ORGINAL = """from flask import Flask
 app = Flask(__name__)
--- a/tests/test_file_watcher.py
+++ b/tests/test_file_watcher.py
@@ -0,0 +1,503 @@
+import os
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+from watchdog.events import FileCreatedEvent, FileDeletedEvent, FileModifiedEvent, FileMovedEvent
+
+from openhands.events import EventSource
+from openhands.events.observation import FileEditObservation
+from openhands.intent.watch import FileWatcher
+
+
+@pytest.fixture
+def mock_event_stream():
+    """Create a mock event stream."""
+    stream = MagicMock()
+    stream.add_event = MagicMock()
+    return stream
+
+
+@pytest.fixture
+def temp_dir():
+    """Create a temporary directory for testing."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield tmpdir
+
+
+@pytest.fixture
+def watcher(mock_event_stream, temp_dir):
+    """Create a FileWatcher instance with mocked components and debouncing disabled."""
+    with patch('watchdog.observers.Observer'):
+        watcher = FileWatcher(temp_dir, mock_event_stream)
+        watcher.use_debouncing = False  # Disable debouncing for basic tests
+        yield watcher
+
+
+def create_test_file(path: str, content: str = ""):
+    """Create a test file with given content."""
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, 'w') as f:
+        f.write(content)
+
+
+def test_file_creation(watcher, temp_dir):
+    """Test that file creation events are handled correctly."""
+    file_path = os.path.join(temp_dir, "test.txt")
+    content = "Hello, World!"
+    
+    # Create the file
+    create_test_file(file_path, content)
+    
+    # Simulate watchdog event
+    event = FileCreatedEvent(file_path)
+    watcher.on_created(event)
+    
+    # Verify the event was emitted correctly
+    watcher.event_stream.add_event.assert_called_once()
+    args = watcher.event_stream.add_event.call_args[0]
+    observation, source = args
+    
+    assert isinstance(observation, FileEditObservation)
+    assert observation.path == "test.txt"  # Should be relative path
+    assert observation.prev_exist is False
+    assert observation.old_content == ""
+    assert observation.new_content == content
+    assert observation.content.startswith("+Hello, World!")
+    assert source == EventSource.USER
+
+
+def test_file_modification(watcher, temp_dir):
+    """Test that file modification events are handled correctly."""
+    file_path = os.path.join(temp_dir, "test.txt")
+    old_content = "Old content"
+    new_content = "New content"
+    
+    # Create initial file
+    create_test_file(file_path, old_content)
+    watcher.file_contents[file_path] = old_content
+    
+    # Update the file
+    create_test_file(file_path, new_content)
+    
+    # Simulate watchdog event
+    event = FileModifiedEvent(file_path)
+    watcher.on_modified(event)
+    
+    # Verify the event was emitted correctly
+    watcher.event_stream.add_event.assert_called_once()
+    observation, source = watcher.event_stream.add_event.call_args[0]
+    
+    assert isinstance(observation, FileEditObservation)
+    assert observation.path == "test.txt"
+    assert observation.prev_exist is True
+    assert observation.old_content == old_content
+    assert observation.new_content == new_content
+    assert "-Old content" in observation.content
+    assert "+New content" in observation.content
+    assert source == EventSource.USER
+
+
+def test_file_deletion(watcher, temp_dir):
+    """Test that file deletion events are handled correctly."""
+    file_path = os.path.join(temp_dir, "test.txt")
+    content = "Content to delete"
+    
+    # Create initial file
+    create_test_file(file_path, content)
+    watcher.file_contents[file_path] = content
+    
+    # Delete the file
+    os.unlink(file_path)
+    
+    # Simulate watchdog event
+    event = FileDeletedEvent(file_path)
+    watcher.on_deleted(event)
+    
+    # Verify the event was emitted correctly
+    watcher.event_stream.add_event.assert_called_once()
+    observation, source = watcher.event_stream.add_event.call_args[0]
+    
+    assert isinstance(observation, FileEditObservation)
+    assert observation.path == "test.txt"
+    assert observation.prev_exist is True
+    assert observation.old_content == content
+    assert observation.new_content == ""
+    assert "-Content to delete" in observation.content
+    assert source == EventSource.USER
+
+
+def test_file_move(watcher, temp_dir):
+    """Test that file move/rename events are handled correctly."""
+    src_path = os.path.join(temp_dir, "old.txt")
+    dst_path = os.path.join(temp_dir, "new.txt")
+    content = "Content to move"
+    
+    # Create source file
+    create_test_file(src_path, content)
+    watcher.file_contents[src_path] = content
+    
+    # Move the file
+    os.rename(src_path, dst_path)
+    
+    # Simulate watchdog event
+    event = FileMovedEvent(src_path, dst_path)
+    watcher.on_moved(event)
+    
+    # Should have two events: deletion and creation
+    assert watcher.event_stream.add_event.call_count == 2
+    
+    # Check deletion event
+    del_observation, del_source = watcher.event_stream.add_event.call_args_list[0][0]
+    assert isinstance(del_observation, FileEditObservation)
+    assert del_observation.path == "old.txt"
+    assert del_observation.prev_exist is True
+    assert del_observation.old_content == content
+    assert del_observation.new_content == ""
+    assert "-Content to move" in del_observation.content
+    assert del_source == EventSource.USER
+    
+    # Check creation event
+    create_observation, create_source = watcher.event_stream.add_event.call_args_list[1][0]
+    assert isinstance(create_observation, FileEditObservation)
+    assert create_observation.path == "new.txt"
+    assert create_observation.prev_exist is False
+    assert create_observation.old_content == ""
+    assert create_observation.new_content == content
+    assert "+Content to move" in create_observation.content
+    assert create_source == EventSource.USER
+
+
+def test_gitignore_handling(watcher, temp_dir):
+    """Test that .gitignore patterns are respected."""
+    # Create a .gitignore file
+    gitignore_content = """
+# Node modules
+**/node_modules/
+# Python
+*.pyc
+__pycache__/
+# Custom
+/ignored/
+*.log
+"""
+    create_test_file(os.path.join(temp_dir, ".gitignore"), gitignore_content)
+    
+    # Reload gitignore patterns
+    watcher.gitignore_spec = watcher._load_gitignore()
+    
+    # Test various paths
+    test_cases = [
+        ("node_modules/file.txt", True),
+        ("frontend/node_modules/package.json", True),
+        ("deep/path/node_modules/file.js", True),
+        ("file.pyc", True),
+        ("dir/__pycache__/module.pyc", True),
+        ("ignored/file.txt", True),
+        ("debug.log", True),
+        ("src/app.js", False),
+        ("frontend/src/components/Button.tsx", False),
+        ("README.md", False),
+    ]
+    
+    for rel_path, should_ignore in test_cases:
+        abs_path = os.path.join(temp_dir, rel_path)
+        assert watcher._should_ignore(abs_path) == should_ignore, f"Failed for {rel_path}"
+
+
+def test_git_directory_ignored(watcher, temp_dir):
+    """Test that .git directory is always ignored regardless of gitignore."""
+    # Create some files in a .git directory
+    git_files = [
+        ".git/HEAD",
+        ".git/config",
+        ".git/refs/heads/main",
+        ".git/objects/ab/cdef1234567890",
+        "subdir/.git/HEAD",  # Test nested .git directories
+        "subdir/.git/config",
+    ]
+    
+    # Create the files
+    for rel_path in git_files:
+        abs_path = os.path.join(temp_dir, rel_path)
+        create_test_file(abs_path, "test content")
+    
+    # Create some non-.git files for comparison
+    normal_files = [
+        "src/file.txt",
+        "subdir/file.txt",
+    ]
+    for rel_path in normal_files:
+        abs_path = os.path.join(temp_dir, rel_path)
+        create_test_file(abs_path, "test content")
+    
+    # Test that all .git paths are ignored
+    for rel_path in git_files:
+        abs_path = os.path.join(temp_dir, rel_path)
+        assert watcher._should_ignore(abs_path), f".git file not ignored: {rel_path}"
+        
+        # Also test the directory itself
+        dir_path = os.path.dirname(abs_path)
+        if '.git' in os.path.basename(dir_path):
+            assert watcher._should_ignore(dir_path), f".git directory not ignored: {os.path.dirname(rel_path)}"
+    
+    # Test that normal files are not ignored
+    for rel_path in normal_files:
+        abs_path = os.path.join(temp_dir, rel_path)
+        assert not watcher._should_ignore(abs_path), f"Non-.git file incorrectly ignored: {rel_path}"
+
+
+def test_explicit_ignore_patterns(watcher, temp_dir):
+    """Test that explicitly provided ignore patterns work."""
+    # Create watcher with custom ignore patterns
+    custom_patterns = ["*.txt", "temp/*"]
+    with patch('watchdog.observers.Observer'):
+        watcher = FileWatcher(
+            temp_dir,
+            watcher.event_stream,
+            ignore_patterns=custom_patterns
+        )
+    
+    test_cases = [
+        ("file.txt", True),
+        ("path/to/doc.txt", True),
+        ("temp/any.js", True),
+        ("temp/file.py", True),
+        ("file.js", False),
+        ("docs/file.md", False),
+    ]
+    
+    for rel_path, should_ignore in test_cases:
+        abs_path = os.path.join(temp_dir, rel_path)
+        assert watcher._should_ignore(abs_path) == should_ignore, f"Failed for {rel_path}"
+
+
+def test_watch_patterns(watcher, temp_dir):
+    """Test that watch patterns work correctly."""
+    # Create watcher with watch patterns
+    watch_patterns = ["*.py", "src/*.ts"]
+    with patch('watchdog.observers.Observer'):
+        watcher = FileWatcher(
+            temp_dir,
+            watcher.event_stream,
+            patterns=watch_patterns
+        )
+    
+    test_cases = [
+        ("file.py", True),
+        ("src/app.ts", True),
+        ("src/deep/file.ts", False),  # Not directly in src/
+        ("file.js", False),
+        ("src/file.js", False),
+    ]
+    
+    for rel_path, should_watch in test_cases:
+        abs_path = os.path.join(temp_dir, rel_path)
+        assert watcher._should_watch(abs_path) == should_watch, f"Failed for {rel_path}"
+
+
+@pytest.fixture
+def watcher_with_short_delay(mock_event_stream, temp_dir):
+    """Create a FileWatcher instance with a very short debounce delay for testing."""
+    with patch('watchdog.observers.Observer'):
+        watcher = FileWatcher(temp_dir, mock_event_stream)
+        # Set a very short delay for testing
+        watcher.debounce_delay = 0.01
+        yield watcher
+
+
+def test_debounce_rapid_changes(watcher_with_short_delay, temp_dir):
+    """Test that rapid changes to a file result in a single event."""
+    import time
+    
+    file_path = os.path.join(temp_dir, "test.txt")
+    initial_content = "Initial content"
+    final_content = "Final content"
+    
+    # Create initial file
+    create_test_file(file_path, initial_content)
+    watcher_with_short_delay.file_contents[file_path] = initial_content
+    
+    # Simulate rapid changes
+    for i in range(5):
+        create_test_file(file_path, f"Content version {i}")
+        event = FileModifiedEvent(file_path)
+        watcher_with_short_delay.on_modified(event)
+    
+    # Final change
+    create_test_file(file_path, final_content)
+    event = FileModifiedEvent(file_path)
+    watcher_with_short_delay.on_modified(event)
+    
+    # Wait for debounce timer
+    time.sleep(0.02)  # Slightly longer than debounce_delay
+    
+    # Should only have one event with the final content
+    watcher_with_short_delay.event_stream.add_event.assert_called_once()
+    observation, source = watcher_with_short_delay.event_stream.add_event.call_args[0]
+    
+    assert isinstance(observation, FileEditObservation)
+    assert observation.path == "test.txt"
+    assert observation.old_content == initial_content
+    assert observation.new_content == final_content
+
+
+def test_neovim_sequence(watcher_with_short_delay, temp_dir):
+    """Test handling of neovim's sequence of file operations."""
+    import time
+    
+    file_path = os.path.join(temp_dir, "test.txt")
+    initial_content = "Initial content"
+    final_content = "Final content"
+    
+    # Create initial file
+    create_test_file(file_path, initial_content)
+    watcher_with_short_delay.file_contents[file_path] = initial_content
+    
+    # Simulate neovim's sequence of operations
+    # 1. Create swap file
+    swap_path = os.path.join(temp_dir, "4913")
+    event = FileCreatedEvent(swap_path)
+    watcher_with_short_delay.on_created(event)
+    
+    # 2. Delete swap file
+    event = FileDeletedEvent(swap_path)
+    watcher_with_short_delay.on_deleted(event)
+    
+    # 3. Create backup
+    backup_path = file_path + "~"
+    event = FileCreatedEvent(backup_path)
+    watcher_with_short_delay.on_created(event)
+    
+    # 4. Modify original file
+    create_test_file(file_path, final_content)
+    event = FileModifiedEvent(file_path)
+    watcher_with_short_delay.on_modified(event)
+    
+    # 5. Delete backup
+    event = FileDeletedEvent(backup_path)
+    watcher_with_short_delay.on_deleted(event)
+    
+    # Wait for debounce timer
+    time.sleep(0.02)  # Slightly longer than debounce_delay
+    
+    # Should only have one event with the final content
+    assert watcher_with_short_delay.event_stream.add_event.call_count == 1
+    observation, source = watcher_with_short_delay.event_stream.add_event.call_args[0]
+    
+    assert isinstance(observation, FileEditObservation)
+    assert observation.path == "test.txt"
+    assert observation.old_content == initial_content
+    assert observation.new_content == final_content
+
+
+def test_debounce_timer_cancellation(watcher_with_short_delay, temp_dir):
+    """Test that pending debounce timers are properly cancelled."""
+    import time
+    
+    file_path = os.path.join(temp_dir, "test.txt")
+    initial_content = "Initial content"
+    
+    # Create initial file
+    create_test_file(file_path, initial_content)
+    watcher_with_short_delay.file_contents[file_path] = initial_content
+    
+    # Start a change
+    event = FileModifiedEvent(file_path)
+    watcher_with_short_delay.on_modified(event)
+    
+    # Verify timer is created
+    assert file_path in watcher_with_short_delay.debounce_timers
+    assert file_path in watcher_with_short_delay.pending_changes
+    
+    # Delete the file before timer expires
+    event = FileDeletedEvent(file_path)
+    watcher_with_short_delay.on_deleted(event)
+    
+    # Timer should be cancelled and removed
+    assert file_path not in watcher_with_short_delay.debounce_timers
+    assert file_path not in watcher_with_short_delay.pending_changes
+    
+    # Wait to ensure no extra events
+    time.sleep(0.2)  # Wait longer than rename_window
+    
+    # Should only have the deletion event
+    assert watcher_with_short_delay.event_stream.add_event.call_count == 1
+    observation, source = watcher_with_short_delay.event_stream.add_event.call_args[0]
+    assert observation.new_content == ""  # Deletion event
+
+
+def test_concurrent_delete_handling(watcher_with_short_delay, temp_dir):
+    """Test that concurrent delete operations are handled safely."""
+    import time
+    
+    file_path = os.path.join(temp_dir, "test.txt")
+    content = "File content"
+    
+    # Create initial file
+    create_test_file(file_path, content)
+    watcher_with_short_delay.file_contents[file_path] = content
+    
+    # Simulate a delete
+    event = FileDeletedEvent(file_path)
+    watcher_with_short_delay.on_deleted(event)
+    
+    # Simulate another delete before the first one is processed
+    watcher_with_short_delay.on_deleted(event)
+    
+    # Wait for both timers
+    time.sleep(0.2)  # Longer than rename_window
+    
+    # Should only have one deletion event
+    assert watcher_with_short_delay.event_stream.add_event.call_count == 1
+    observation, source = watcher_with_short_delay.event_stream.add_event.call_args[0]
+    assert observation.path == "test.txt"
+    assert observation.old_content == content
+    assert observation.new_content == ""
+
+
+def test_atomic_rename_handling(watcher_with_short_delay, temp_dir):
+    """Test that atomic renames (delete+create with same content) are handled correctly."""
+    import time
+    
+    old_path = os.path.join(temp_dir, "old.txt")
+    new_path = os.path.join(temp_dir, "new.txt")
+    content = "File content"
+    
+    # Create initial file
+    create_test_file(old_path, content)
+    watcher_with_short_delay.file_contents[old_path] = content
+    
+    # Simulate atomic rename (delete + create with same content)
+    event = FileDeletedEvent(old_path)
+    watcher_with_short_delay.on_deleted(event)
+    
+    # Create the new file with the same content
+    create_test_file(new_path, content)
+    event = FileCreatedEvent(new_path)
+    watcher_with_short_delay.on_created(event)
+    
+    # Wait a bit to ensure any delayed events are processed
+    time.sleep(0.02)
+    
+    # Should have no events since it was just a rename
+    assert watcher_with_short_delay.event_stream.add_event.call_count == 0
+    assert new_path in watcher_with_short_delay.file_contents
+    assert watcher_with_short_delay.file_contents[new_path] == content
+    
+    # Now modify the file
+    new_content = "Modified content"
+    create_test_file(new_path, new_content)
+    event = FileModifiedEvent(new_path)
+    watcher_with_short_delay.on_modified(event)
+    
+    # Wait for debounce timer
+    time.sleep(0.02)
+    
+    # Should now have one event for the modification
+    assert watcher_with_short_delay.event_stream.add_event.call_count == 1
+    observation, source = watcher_with_short_delay.event_stream.add_event.call_args[0]
+    assert observation.path == "new.txt"
+    assert observation.old_content == content
+    assert observation.new_content == new_content
--- a/tests/unit/linters/conftest.py
+++ b/tests/unit/linters/conftest.py
@@ -0,0 +1,75 @@
+import pytest
+
+
+@pytest.fixture
+def syntax_error_py_file(tmp_path):
+    file_content = """
+    def foo():
+        print("Hello, World!")
+    print("Wrong indent")
+    foo(
+    """
+    file_path = tmp_path / 'test_file.py'
+    file_path.write_text(file_content)
+    return str(file_path)
+
+
+@pytest.fixture
+def wrongly_indented_py_file(tmp_path):
+    file_content = """
+    def foo():
+            print("Hello, World!")
+    """
+    file_path = tmp_path / 'test_file.py'
+    file_path.write_text(file_content)
+    return str(file_path)
+
+
+@pytest.fixture
+def simple_correct_py_file(tmp_path):
+    file_content = 'print("Hello, World!")\n'
+    file_path = tmp_path / 'test_file.py'
+    file_path.write_text(file_content)
+    return str(file_path)
+
+
+@pytest.fixture
+def simple_correct_py_func_def(tmp_path):
+    file_content = """def foo():
+    print("Hello, World!")
+foo()
+"""
+    file_path = tmp_path / 'test_file.py'
+    file_path.write_text(file_content)
+    return str(file_path)
+
+
+@pytest.fixture
+def simple_correct_ruby_file(tmp_path):
+    file_content = """def foo
+  puts "Hello, World!"
+end
+foo
+"""
+    file_path = tmp_path / 'test_file.rb'
+    file_path.write_text(file_content)
+    return str(file_path)
+
+
+@pytest.fixture
+def simple_incorrect_ruby_file(tmp_path):
+    file_content = """def foo():
+    print("Hello, World!")
+foo()
+"""
+    file_path = tmp_path / 'test_file.rb'
+    file_path.write_text(file_content)
+    return str(file_path)
+
+
+@pytest.fixture
+def parenthesis_incorrect_ruby_file(tmp_path):
+    file_content = """def print_hello_world()\n    puts 'Hello World'\n"""
+    file_path = tmp_path / 'test_file.rb'
+    file_path.write_text(file_content)
+    return str(file_path)
--- a/tests/unit/linters/test_lint_diff.py
+++ b/tests/unit/linters/test_lint_diff.py
@@ -0,0 +1,417 @@
+from openhands.linter import DefaultLinter, LintResult
+from openhands.utils.diff import get_diff, parse_diff
+
+OLD_CONTENT = """
+def foo():
+    print("Hello, World!")
+    x = UNDEFINED_VARIABLE
+foo()
+"""
+
+NEW_CONTENT_V1 = (
+    OLD_CONTENT
+    + """
+def new_function_that_causes_error():
+    y = ANOTHER_UNDEFINED_VARIABLE
+"""
+)
+
+NEW_CONTENT_V2 = """
+def foo():
+    print("Hello, World!")
+    x = UNDEFINED_VARIABLE
+    y = ANOTHER_UNDEFINED_VARIABLE
+foo()
+"""
+
+
+def test_get_and_parse_diff(tmp_path):
+    diff = get_diff(OLD_CONTENT, NEW_CONTENT_V1, 'test.py')
+    print(diff)
+    assert (
+        diff
+        == """
+--- test.py
+++ test.py
+@@ -6,0 +7,3 @@
+def new_function_that_causes_error():
+    y = ANOTHER_UNDEFINED_VARIABLE
+
+""".strip()
+    )
+
+    print(
+        '\n'.join(
+            [f'{i+1}|{line}' for i, line in enumerate(NEW_CONTENT_V1.splitlines())]
+        )
+    )
+    changes = parse_diff(diff)
+    assert len(changes) == 3
+    assert (
+        changes[0].old is None
+        and changes[0].new == 7
+        and changes[0].line == 'def new_function_that_causes_error():'
+    )
+    assert (
+        changes[1].old is None
+        and changes[1].new == 8
+        and changes[1].line == '    y = ANOTHER_UNDEFINED_VARIABLE'
+    )
+    assert changes[2].old is None and changes[2].new == 9 and changes[2].line == ''
+
+
+def test_lint_with_diff_append(tmp_path):
+    with open(tmp_path / 'old.py', 'w') as f:
+        f.write(OLD_CONTENT)
+    with open(tmp_path / 'new.py', 'w') as f:
+        f.write(NEW_CONTENT_V1)
+
+    linter = DefaultLinter()
+    result: list[LintResult] = linter.lint_file_diff(
+        str(tmp_path / 'old.py'),
+        str(tmp_path / 'new.py'),
+    )
+    print(result)
+    assert len(result) == 1
+    assert (
+        result[0].line == 8
+        and result[0].column == 9
+        and result[0].message == "F821 undefined name 'ANOTHER_UNDEFINED_VARIABLE'"
+    )
+
+
+def test_lint_with_diff_insert(tmp_path):
+    with open(tmp_path / 'old.py', 'w') as f:
+        f.write(OLD_CONTENT)
+    with open(tmp_path / 'new.py', 'w') as f:
+        f.write(NEW_CONTENT_V2)
+
+    linter = DefaultLinter()
+    result: list[LintResult] = linter.lint_file_diff(
+        str(tmp_path / 'old.py'),
+        str(tmp_path / 'new.py'),
+    )
+    assert len(result) == 1
+    assert (
+        result[0].line == 5
+        and result[0].column == 9
+        and result[0].message == "F821 undefined name 'ANOTHER_UNDEFINED_VARIABLE'"
+    )
+
+
+def test_lint_with_multiple_changes_and_errors(tmp_path):
+    old_content = """
+def foo():
+    print("Hello, World!")
+    x = 10
+foo()
+"""
+    new_content = """
+def foo():
+    print("Hello, World!")
+    x = UNDEFINED_VARIABLE
+    y = 20
+
+def bar():
+    z = ANOTHER_UNDEFINED_VARIABLE
+    return z + 1
+
+foo()
+bar()
+"""
+    with open(tmp_path / 'old.py', 'w') as f:
+        f.write(old_content)
+    with open(tmp_path / 'new.py', 'w') as f:
+        f.write(new_content)
+
+    linter = DefaultLinter()
+    result: list[LintResult] = linter.lint_file_diff(
+        str(tmp_path / 'old.py'),
+        str(tmp_path / 'new.py'),
+    )
+    assert len(result) == 2
+    assert (
+        result[0].line == 4
+        and result[0].column == 9
+        and result[0].message == "F821 undefined name 'UNDEFINED_VARIABLE'"
+    )
+    assert (
+        result[1].line == 8
+        and result[1].column == 9
+        and result[1].message == "F821 undefined name 'ANOTHER_UNDEFINED_VARIABLE'"
+    )
+
+
+def test_lint_with_introduced_and_fixed_errors(tmp_path):
+    old_content = """
+x = UNDEFINED_VARIABLE
+y = 10
+"""
+    new_content = """
+x = 5
+y = ANOTHER_UNDEFINED_VARIABLE
+z = UNDEFINED_VARIABLE
+"""
+    with open(tmp_path / 'old.py', 'w') as f:
+        f.write(old_content)
+    with open(tmp_path / 'new.py', 'w') as f:
+        f.write(new_content)
+
+    linter = DefaultLinter()
+    result: list[LintResult] = linter.lint_file_diff(
+        str(tmp_path / 'old.py'),
+        str(tmp_path / 'new.py'),
+    )
+    assert len(result) == 2
+    assert (
+        result[0].line == 3
+        and result[0].column == 5
+        and result[0].message == "F821 undefined name 'ANOTHER_UNDEFINED_VARIABLE'"
+    )
+    assert (
+        result[1].line == 4
+        and result[1].column == 5
+        and result[1].message == "F821 undefined name 'UNDEFINED_VARIABLE'"
+    )
+
+
+def test_lint_with_multiline_changes(tmp_path):
+    old_content = """
+def complex_function(a, b, c):
+    return (a +
+            b +
+            c)
+"""
+    new_content = """
+def complex_function(a, b, c):
+    return (a +
+            UNDEFINED_VARIABLE +
+            b +
+            c)
+"""
+    with open(tmp_path / 'old.py', 'w') as f:
+        f.write(old_content)
+    with open(tmp_path / 'new.py', 'w') as f:
+        f.write(new_content)
+
+    linter = DefaultLinter()
+    result: list[LintResult] = linter.lint_file_diff(
+        str(tmp_path / 'old.py'),
+        str(tmp_path / 'new.py'),
+    )
+    assert len(result) == 1
+    assert (
+        result[0].line == 4
+        and result[0].column == 13
+        and result[0].message == "F821 undefined name 'UNDEFINED_VARIABLE'"
+    )
+
+
+def test_lint_with_syntax_error(tmp_path):
+    old_content = """
+def foo():
+    print("Hello, World!")
+"""
+    new_content = """
+def foo():
+    print("Hello, World!"
+"""
+    with open(tmp_path / 'old.py', 'w') as f:
+        f.write(old_content)
+    with open(tmp_path / 'new.py', 'w') as f:
+        f.write(new_content)
+
+    linter = DefaultLinter()
+    result: list[LintResult] = linter.lint_file_diff(
+        str(tmp_path / 'old.py'),
+        str(tmp_path / 'new.py'),
+    )
+    assert len(result) == 1
+    assert (
+        result[0].line == 3
+        and result[0].column == 11
+        and result[0].message == "E999 SyntaxError: '(' was never closed"
+    )
+
+
+def test_lint_with_docstring_changes(tmp_path):
+    old_content = '''
+def foo():
+    """This is a function."""
+    print("Hello, World!")
+'''
+    new_content = '''
+def foo():
+    """
+    This is a function.
+    It now has a multi-line docstring with an UNDEFINED_VARIABLE.
+    """
+    print("Hello, World!")
+'''
+    with open(tmp_path / 'old.py', 'w') as f:
+        f.write(old_content)
+    with open(tmp_path / 'new.py', 'w') as f:
+        f.write(new_content)
+
+    linter = DefaultLinter()
+    result: list[LintResult] = linter.lint_file_diff(
+        str(tmp_path / 'old.py'),
+        str(tmp_path / 'new.py'),
+    )
+    assert len(result) == 0  # Linter should ignore changes in docstrings
+
+
+def test_lint_with_multiple_errors_on_same_line(tmp_path):
+    old_content = """
+def foo():
+    print("Hello, World!")
+    x = 10
+foo()
+"""
+    new_content = """
+def foo():
+    print("Hello, World!")
+    x = UNDEFINED_VARIABLE + ANOTHER_UNDEFINED_VARIABLE
+foo()
+"""
+    with open(tmp_path / 'old.py', 'w') as f:
+        f.write(old_content)
+    with open(tmp_path / 'new.py', 'w') as f:
+        f.write(new_content)
+
+    linter = DefaultLinter()
+    result: list[LintResult] = linter.lint_file_diff(
+        str(tmp_path / 'old.py'),
+        str(tmp_path / 'new.py'),
+    )
+    print(result)
+    assert len(result) == 2
+    assert (
+        result[0].line == 4
+        and result[0].column == 9
+        and result[0].message == "F821 undefined name 'UNDEFINED_VARIABLE'"
+    )
+    assert (
+        result[1].line == 4
+        and result[1].column == 30
+        and result[1].message == "F821 undefined name 'ANOTHER_UNDEFINED_VARIABLE'"
+    )
+
+
+def test_parse_diff_with_empty_patch():
+    diff_patch = ''
+    changes = parse_diff(diff_patch)
+    assert len(changes) == 0
+
+
+def test_lint_file_diff_ignore_existing_errors(tmp_path):
+    """
+    Make sure we allow edits as long as it does not introduce new errors. In other
+    words, we don't care about existing linting errors. Although they might be
+    real syntax issues, sometimes they are just false positives, or errors that
+    we don't care about.
+    """
+    content = """def some_valid_but_weird_function():
+    # this function is legitimate, yet static analysis tools like flake8
+    # reports 'F821 undefined name'
+    if 'variable' in locals():
+        print(variable)
+def some_wrong_but_unused_function():
+    # this function has a linting error, but it is not modified by us, and
+    # who knows, this function might be completely dead code
+    x = 1
+def sum(a, b):
+    return a - b
+"""
+    new_content = content.replace('    return a - b', '    return a + b')
+    temp_file_old_path = tmp_path / 'problematic-file-test.py'
+    temp_file_old_path.write_text(content)
+    temp_file_new_path = tmp_path / 'problematic-file-test-new.py'
+    temp_file_new_path.write_text(new_content)
+
+    linter = DefaultLinter()
+    result: list[LintResult] = linter.lint_file_diff(
+        str(temp_file_old_path),
+        str(temp_file_new_path),
+    )
+    assert len(result) == 0  # no new errors introduced
+
+
+def test_lint_file_diff_catch_new_errors_in_edits(tmp_path):
+    """
+    Make sure we catch new linting errors in our edit chunk, and at the same
+    time, ignore old linting errors (in this case, the old linting error is
+    a false positive)
+    """
+    content = """def some_valid_but_weird_function():
+    # this function is legitimate, yet static analysis tools like flake8
+    # reports 'F821 undefined name'
+    if 'variable' in locals():
+        print(variable)
+def sum(a, b):
+    return a - b
+"""
+
+    temp_file_old_path = tmp_path / 'problematic-file-test.py'
+    temp_file_old_path.write_text(content)
+    new_content = content.replace('    return a - b', '    return a + variable')
+    temp_file_new_path = tmp_path / 'problematic-file-test-new.py'
+    temp_file_new_path.write_text(new_content)
+
+    linter = DefaultLinter()
+    result: list[LintResult] = linter.lint_file_diff(
+        str(temp_file_old_path),
+        str(temp_file_new_path),
+    )
+    print(result)
+    assert len(result) == 1
+    assert (
+        result[0].line == 7
+        and result[0].column == 16
+        and result[0].message == "F821 undefined name 'variable'"
+    )
+
+
+def test_lint_file_diff_catch_new_errors_outside_edits(tmp_path):
+    """
+    Make sure we catch new linting errors induced by our edits, even
+    though the error itself is not in the edit chunk
+    """
+    content = """def valid_func1():
+    print(my_sum(1, 2))
+def my_sum(a, b):
+    return a - b
+def valid_func2():
+    print(my_sum(0, 0))
+"""
+    # Add 100 lines of invalid code, which linter shall ignore
+    # because they are not being edited. For testing purpose, we
+    # must add these existing linting errors, otherwise the pre-edit
+    # linting would pass, and thus there won't be any comparison
+    # between pre-edit and post-edit linting.
+    for _ in range(100):
+        content += '\ninvalid_func()'
+
+    temp_file_old_path = tmp_path / 'problematic-file-test.py'
+    temp_file_old_path.write_text(content)
+
+    new_content = content.replace('def my_sum(a, b):', 'def my_sum2(a, b):')
+    temp_file_new_path = tmp_path / 'problematic-file-test-new.py'
+    temp_file_new_path.write_text(new_content)
+
+    linter = DefaultLinter()
+    result: list[LintResult] = linter.lint_file_diff(
+        str(temp_file_old_path),
+        str(temp_file_new_path),
+    )
+    assert len(result) == 2
+    assert (
+        result[0].line == 2
+        and result[0].column == 11
+        and result[0].message == "F821 undefined name 'my_sum'"
+    )
+    assert (
+        result[1].line == 6
+        and result[1].column == 11
+        and result[1].message == "F821 undefined name 'my_sum'"
+    )
--- a/tests/unit/linters/test_python_linter.py
+++ b/tests/unit/linters/test_python_linter.py
@@ -0,0 +1,84 @@
+from openhands.linter import DefaultLinter, LintResult
+from openhands.linter.languages.python import (
+    PythonLinter,
+    flake_lint,
+    python_compile_lint,
+)
+
+
+def test_wrongly_indented_py_file(wrongly_indented_py_file):
+    # Test Python linter
+    linter = PythonLinter()
+    assert '.py' in linter.supported_extensions
+    result = linter.lint(wrongly_indented_py_file)
+    print(result)
+    assert isinstance(result, list) and len(result) == 1
+    assert result[0] == LintResult(
+        file=wrongly_indented_py_file,
+        line=2,
+        column=5,
+        message='E999 IndentationError: unexpected indent',
+    )
+    print(result[0].visualize())
+    assert result[0].visualize() == (
+        '1|\n'
+        '\033[91m2|    def foo():\033[0m\n'
+        '      ^ ERROR HERE: E999 IndentationError: unexpected indent\n'
+        '3|            print("Hello, World!")\n'
+        '4|'
+    )
+
+    # General linter should have same result as Python linter
+    # bc it uses PythonLinter under the hood
+    general_linter = DefaultLinter()
+    assert '.py' in general_linter.supported_extensions
+    result = general_linter.lint(wrongly_indented_py_file)
+    assert result == linter.lint(wrongly_indented_py_file)
+
+    # Test flake8_lint
+    assert result == flake_lint(wrongly_indented_py_file)
+
+    # Test python_compile_lint
+    compile_result = python_compile_lint(wrongly_indented_py_file)
+    assert isinstance(compile_result, list) and len(compile_result) == 1
+    assert compile_result[0] == LintResult(
+        file=wrongly_indented_py_file, line=2, column=4, message='unexpected indent'
+    )
+
+
+def test_simple_correct_py_file(simple_correct_py_file):
+    linter = PythonLinter()
+    assert '.py' in linter.supported_extensions
+    result = linter.lint(simple_correct_py_file)
+    assert result == []
+
+    general_linter = DefaultLinter()
+    assert '.py' in general_linter.supported_extensions
+    result = general_linter.lint(simple_correct_py_file)
+    assert result == linter.lint(simple_correct_py_file)
+
+    # Test python_compile_lint
+    compile_result = python_compile_lint(simple_correct_py_file)
+    assert compile_result == []
+
+    # Test flake_lint
+    flake_result = flake_lint(simple_correct_py_file)
+    assert flake_result == []
+
+
+def test_simple_correct_py_func_def(simple_correct_py_func_def):
+    linter = PythonLinter()
+    result = linter.lint(simple_correct_py_func_def)
+    assert result == []
+
+    general_linter = DefaultLinter()
+    assert '.py' in general_linter.supported_extensions
+    result = general_linter.lint(simple_correct_py_func_def)
+    assert result == linter.lint(simple_correct_py_func_def)
+
+    # Test flake_lint
+    assert result == flake_lint(simple_correct_py_func_def)
+
+    # Test python_compile_lint
+    compile_result = python_compile_lint(simple_correct_py_func_def)
+    assert compile_result == []
--- a/tests/unit/linters/test_treesitter_linter.py
+++ b/tests/unit/linters/test_treesitter_linter.py
@@ -0,0 +1,113 @@
+from openhands.linter import DefaultLinter, LintResult
+from openhands.linter.languages.treesitter import TreesitterBasicLinter
+
+
+def test_syntax_error_py_file(syntax_error_py_file):
+    linter = TreesitterBasicLinter()
+    result = linter.lint(syntax_error_py_file)
+    print(result)
+    assert isinstance(result, list) and len(result) == 1
+    assert result[0] == LintResult(
+        file=syntax_error_py_file,
+        line=5,
+        column=5,
+        message='Syntax error',
+    )
+
+    assert (
+        result[0].visualize()
+        == (
+            '2|    def foo():\n'
+            '3|        print("Hello, World!")\n'
+            '4|    print("Wrong indent")\n'
+            '\033[91m5|    foo(\033[0m\n'  # color red
+            '      ^ ERROR HERE: Syntax error\n'
+            '6|'
+        )
+    )
+    print(result[0].visualize())
+
+    general_linter = DefaultLinter()
+    general_result = general_linter.lint(syntax_error_py_file)
+    # NOTE: general linter returns different result
+    # because it uses flake8 first, which is different from treesitter
+    assert general_result != result
+
+
+def test_simple_correct_ruby_file(simple_correct_ruby_file):
+    linter = TreesitterBasicLinter()
+    result = linter.lint(simple_correct_ruby_file)
+    assert isinstance(result, list) and len(result) == 0
+
+    # Test that the general linter also returns the same result
+    general_linter = DefaultLinter()
+    general_result = general_linter.lint(simple_correct_ruby_file)
+    assert general_result == result
+
+
+def test_simple_incorrect_ruby_file(simple_incorrect_ruby_file):
+    linter = TreesitterBasicLinter()
+    result = linter.lint(simple_incorrect_ruby_file)
+    print(result)
+    assert isinstance(result, list) and len(result) == 2
+    assert result[0] == LintResult(
+        file=simple_incorrect_ruby_file,
+        line=1,
+        column=1,
+        message='Syntax error',
+    )
+    print(result[0].visualize())
+    assert (
+        result[0].visualize()
+        == (
+            '\033[91m1|def foo():\033[0m\n'  # color red
+            '  ^ ERROR HERE: Syntax error\n'
+            '2|    print("Hello, World!")\n'
+            '3|foo()'
+        )
+    )
+    assert result[1] == LintResult(
+        file=simple_incorrect_ruby_file,
+        line=1,
+        column=10,
+        message='Syntax error',
+    )
+    print(result[1].visualize())
+    assert (
+        result[1].visualize()
+        == (
+            '\033[91m1|def foo():\033[0m\n'  # color red
+            '           ^ ERROR HERE: Syntax error\n'
+            '2|    print("Hello, World!")\n'
+            '3|foo()'
+        )
+    )
+
+    # Test that the general linter also returns the same result
+    general_linter = DefaultLinter()
+    general_result = general_linter.lint(simple_incorrect_ruby_file)
+    assert general_result == result
+
+
+def test_parenthesis_incorrect_ruby_file(parenthesis_incorrect_ruby_file):
+    linter = TreesitterBasicLinter()
+    result = linter.lint(parenthesis_incorrect_ruby_file)
+    print(result)
+    assert isinstance(result, list) and len(result) == 1
+    assert result[0] == LintResult(
+        file=parenthesis_incorrect_ruby_file,
+        line=1,
+        column=1,
+        message='Syntax error',
+    )
+    print(result[0].visualize())
+    assert result[0].visualize() == (
+        '\033[91m1|def print_hello_world()\033[0m\n'
+        '  ^ ERROR HERE: Syntax error\n'
+        "2|    puts 'Hello World'"
+    )
+
+    # Test that the general linter also returns the same result
+    general_linter = DefaultLinter()
+    general_result = general_linter.lint(parenthesis_incorrect_ruby_file)
+    assert general_result == result
--- a/tests/unit/linters/test_visualize.py
+++ b/tests/unit/linters/test_visualize.py
@@ -0,0 +1,86 @@
+from unittest.mock import mock_open, patch
+
+import pytest
+
+from openhands.linter.base import LintResult
+
+
+@pytest.fixture
+def mock_file_content():
+    return '\n'.join([f'Line {i}' for i in range(1, 21)])
+
+
+def test_visualize_standard_case(mock_file_content):
+    lint_result = LintResult(
+        file='test_file.py', line=10, column=5, message='Test error message'
+    )
+
+    with patch('builtins.open', mock_open(read_data=mock_file_content)):
+        result = lint_result.visualize(half_window=3)
+
+    expected_output = (
+        " 7|Line 7\n"
+        " 8|Line 8\n"
+        " 9|Line 9\n"
+        "\033[91m10|Line 10\033[0m\n"
+        f"  {' ' * lint_result.column}^ ERROR HERE: Test error message\n"
+        "11|Line 11\n"
+        "12|Line 12\n"
+        "13|Line 13"
+    )
+
+    assert result == expected_output
+
+
+def test_visualize_small_window(mock_file_content):
+    lint_result = LintResult(
+        file='test_file.py', line=10, column=5, message='Test error message'
+    )
+
+    with patch('builtins.open', mock_open(read_data=mock_file_content)):
+        result = lint_result.visualize(half_window=1)
+
+    expected_output = (
+        " 9|Line 9\n"
+        "\033[91m10|Line 10\033[0m\n"
+        f"  {' ' * lint_result.column}^ ERROR HERE: Test error message\n"
+        "11|Line 11"
+    )
+
+    assert result == expected_output
+
+
+def test_visualize_error_at_start(mock_file_content):
+    lint_result = LintResult(
+        file='test_file.py', line=1, column=3, message='Start error'
+    )
+
+    with patch('builtins.open', mock_open(read_data=mock_file_content)):
+        result = lint_result.visualize(half_window=2)
+
+    expected_output = (
+        "\033[91m 1|Line 1\033[0m\n"
+        f"  {' ' * lint_result.column}^ ERROR HERE: Start error\n"
+        " 2|Line 2\n"
+        " 3|Line 3"
+    )
+
+    assert result == expected_output
+
+
+def test_visualize_error_at_end(mock_file_content):
+    lint_result = LintResult(
+        file='test_file.py', line=20, column=1, message='End error'
+    )
+
+    with patch('builtins.open', mock_open(read_data=mock_file_content)):
+        result = lint_result.visualize(half_window=2)
+
+    expected_output = (
+        "18|Line 18\n"
+        "19|Line 19\n"
+        "\033[91m20|Line 20\033[0m\n"
+        f"  {' ' * lint_result.column}^ ERROR HERE: End error"
+    )
+
+    assert result == expected_output
--- a/tests/unit/resolver/test_issue_handler.py
+++ b/tests/unit/resolver/test_issue_handler.py
@@ -1,18 +1,17 @@
-from unittest.mock import MagicMock, patch
-
-from openhands.core.config import LLMConfig
-from openhands.events.action.message import MessageAction
-from openhands.resolver.github_issue import GithubIssue, ReviewThread
+from unittest.mock import patch, MagicMock
 from openhands.resolver.issue_definitions import IssueHandler, PRHandler
+from openhands.resolver.github_issue import GithubIssue, ReviewThread
+from openhands.events.action.message import MessageAction
+from openhands.core.config import LLMConfig


 def test_get_converted_issues_initializes_review_comments():
    # Mock the necessary dependencies
-    with patch('requests.get') as mock_get:
+    with patch("requests.get") as mock_get:
        # Mock the response for issues
        mock_issues_response = MagicMock()
        mock_issues_response.json.return_value = [
-            {'number': 1, 'title': 'Test Issue', 'body': 'Test Body'}
+            {"number": 1, "title": "Test Issue", "body": "Test Body"}
        ]
        # Mock the response for comments
        mock_comments_response = MagicMock()
@@ -27,10 +26,10 @@ def test_get_converted_issues_initializes_review_comments():
        ]  # Need two comment responses because we make two API calls

        # Create an instance of IssueHandler
-        handler = IssueHandler('test-owner', 'test-repo', 'test-token')
+        handler = IssueHandler("test-owner", "test-repo", "test-token")

        # Get converted issues
-        issues = handler.get_converted_issues(issue_numbers=[1])
+        issues = handler.get_converted_issues()

        # Verify that we got exactly one issue
        assert len(issues) == 1
@@ -40,35 +39,35 @@ def test_get_converted_issues_initializes_review_comments():

        # Verify other fields are set correctly
        assert issues[0].number == 1
-        assert issues[0].title == 'Test Issue'
-        assert issues[0].body == 'Test Body'
-        assert issues[0].owner == 'test-owner'
-        assert issues[0].repo == 'test-repo'
+        assert issues[0].title == "Test Issue"
+        assert issues[0].body == "Test Body"
+        assert issues[0].owner == "test-owner"
+        assert issues[0].repo == "test-repo"


 def test_pr_handler_guess_success_with_thread_comments():
    # Create a PR handler instance
-    handler = PRHandler('test-owner', 'test-repo', 'test-token')
+    handler = PRHandler("test-owner", "test-repo", "test-token")

    # Create a mock issue with thread comments but no review comments
    issue = GithubIssue(
-        owner='test-owner',
-        repo='test-repo',
+        owner="test-owner",
+        repo="test-repo",
        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=['First comment', 'Second comment'],
-        closing_issues=['Issue description'],
+        title="Test PR",
+        body="Test Body",
+        thread_comments=["First comment", "Second comment"],
+        closing_issues=["Issue description"],
        review_comments=None,
        thread_ids=None,
-        head_branch='test-branch',
+        head_branch="test-branch",
    )

    # Create mock history
-    history = [MessageAction(content='Fixed the issue by implementing X and Y')]
+    history = [MessageAction(content="Fixed the issue by implementing X and Y")]

    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
+    llm_config = LLMConfig(model="test-model", api_key="test-key")

    # Mock the LLM response
    mock_response = MagicMock()
@@ -85,7 +84,7 @@ The changes successfully address the feedback."""
    ]

    # Test the guess_success method
-    with patch('litellm.completion', return_value=mock_response):
+    with patch("litellm.completion", return_value=mock_response):
        success, success_list, explanation = handler.guess_success(
            issue, history, llm_config
        )
@@ -93,39 +92,39 @@ The changes successfully address the feedback."""
        # Verify the results
        assert success is True
        assert success_list == [True]
-        assert 'successfully address' in explanation
+        assert "successfully address" in explanation


 def test_pr_handler_get_converted_issues_with_comments():
    # Mock the necessary dependencies
-    with patch('requests.get') as mock_get:
+    with patch("requests.get") as mock_get:
        # Mock the response for PRs
        mock_prs_response = MagicMock()
        mock_prs_response.json.return_value = [
            {
-                'number': 1,
-                'title': 'Test PR',
-                'body': 'Test Body fixes #1',
-                'head': {'ref': 'test-branch'},
+                "number": 1,
+                "title": "Test PR",
+                "body": "Test Body fixes #1",
+                "head": {"ref": "test-branch"},
            }
        ]

        # Mock the response for PR comments
        mock_comments_response = MagicMock()
        mock_comments_response.json.return_value = [
-            {'body': 'First comment'},
-            {'body': 'Second comment'},
+            {"body": "First comment"},
+            {"body": "Second comment"},
        ]

        # Mock the response for PR metadata (GraphQL)
        mock_graphql_response = MagicMock()
        mock_graphql_response.json.return_value = {
-            'data': {
-                'repository': {
-                    'pullRequest': {
-                        'closingIssuesReferences': {'edges': []},
-                        'reviews': {'nodes': []},
-                        'reviewThreads': {'edges': []},
+            "data": {
+                "repository": {
+                    "pullRequest": {
+                        "closingIssuesReferences": {"edges": []},
+                        "reviews": {"nodes": []},
+                        "reviewThreads": {"edges": []},
                    }
                }
            }
@@ -139,7 +138,7 @@ def test_pr_handler_get_converted_issues_with_comments():
        # Mock the response for fetching the external issue referenced in PR body
        mock_external_issue_response = MagicMock()
        mock_external_issue_response.json.return_value = {
-            'body': 'This is additional context from an externally referenced issue.'
+            "body": "This is additional context from an externally referenced issue."
        }

        mock_get.side_effect = [
@@ -151,56 +150,56 @@ def test_pr_handler_get_converted_issues_with_comments():
        ]

        # Mock the post request for GraphQL
-        with patch('requests.post') as mock_post:
+        with patch("requests.post") as mock_post:
            mock_post.return_value = mock_graphql_response

            # Create an instance of PRHandler
-            handler = PRHandler('test-owner', 'test-repo', 'test-token')
+            handler = PRHandler("test-owner", "test-repo", "test-token")

            # Get converted issues
-            prs = handler.get_converted_issues(issue_numbers=[1])
+            prs = handler.get_converted_issues()

            # Verify that we got exactly one PR
            assert len(prs) == 1

            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments == ['First comment', 'Second comment']
+            assert prs[0].thread_comments == ["First comment", "Second comment"]

            # Verify other fields are set correctly
            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body fixes #1'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
+            assert prs[0].title == "Test PR"
+            assert prs[0].body == "Test Body fixes #1"
+            assert prs[0].owner == "test-owner"
+            assert prs[0].repo == "test-repo"
+            assert prs[0].head_branch == "test-branch"
            assert prs[0].closing_issues == [
-                'This is additional context from an externally referenced issue.'
+                "This is additional context from an externally referenced issue."
            ]


 def test_pr_handler_guess_success_only_review_comments():
    # Create a PR handler instance
-    handler = PRHandler('test-owner', 'test-repo', 'test-token')
+    handler = PRHandler("test-owner", "test-repo", "test-token")

    # Create a mock issue with only review comments
    issue = GithubIssue(
-        owner='test-owner',
-        repo='test-repo',
+        owner="test-owner",
+        repo="test-repo",
        number=1,
-        title='Test PR',
-        body='Test Body',
+        title="Test PR",
+        body="Test Body",
        thread_comments=None,
-        closing_issues=['Issue description'],
-        review_comments=['Please fix the formatting', 'Add more tests'],
+        closing_issues=["Issue description"],
+        review_comments=["Please fix the formatting", "Add more tests"],
        thread_ids=None,
-        head_branch='test-branch',
+        head_branch="test-branch",
    )

    # Create mock history
-    history = [MessageAction(content='Fixed the formatting and added more tests')]
+    history = [MessageAction(content="Fixed the formatting and added more tests")]

    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
+    llm_config = LLMConfig(model="test-model", api_key="test-key")

    # Mock the LLM response
    mock_response = MagicMock()
@@ -217,7 +216,7 @@ The changes successfully address the review comments."""
    ]

    # Test the guess_success method
-    with patch('litellm.completion', return_value=mock_response):
+    with patch("litellm.completion", return_value=mock_response):
        success, success_list, explanation = handler.guess_success(
            issue, history, llm_config
        )
@@ -225,32 +224,32 @@ The changes successfully address the review comments."""
        # Verify the results
        assert success is True
        assert success_list == [True]
-        assert 'successfully address' in explanation
+        assert "successfully address" in explanation


 def test_pr_handler_guess_success_no_comments():
    # Create a PR handler instance
-    handler = PRHandler('test-owner', 'test-repo', 'test-token')
+    handler = PRHandler("test-owner", "test-repo", "test-token")

    # Create a mock issue with no comments
    issue = GithubIssue(
-        owner='test-owner',
-        repo='test-repo',
+        owner="test-owner",
+        repo="test-repo",
        number=1,
-        title='Test PR',
-        body='Test Body',
+        title="Test PR",
+        body="Test Body",
        thread_comments=None,
-        closing_issues=['Issue description'],
+        closing_issues=["Issue description"],
        review_comments=None,
        thread_ids=None,
-        head_branch='test-branch',
+        head_branch="test-branch",
    )

    # Create mock history
-    history = [MessageAction(content='Fixed the issue')]
+    history = [MessageAction(content="Fixed the issue")]

    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
+    llm_config = LLMConfig(model="test-model", api_key="test-key")

    # Test that it returns appropriate message when no comments are present
    success, success_list, explanation = handler.guess_success(
@@ -258,29 +257,29 @@ def test_pr_handler_guess_success_no_comments():
    )
    assert success is False
    assert success_list is None
-    assert explanation == 'No feedback was found to process'
+    assert explanation == "No feedback was found to process"


 def test_get_issue_comments_with_specific_comment_id():
    # Mock the necessary dependencies
-    with patch('requests.get') as mock_get:
+    with patch("requests.get") as mock_get:
        # Mock the response for comments
        mock_comments_response = MagicMock()
        mock_comments_response.json.return_value = [
-            {'id': 123, 'body': 'First comment'},
-            {'id': 456, 'body': 'Second comment'},
+            {"id": 123, "body": "First comment"},
+            {"id": 456, "body": "Second comment"},
        ]

        mock_get.return_value = mock_comments_response

        # Create an instance of IssueHandler
-        handler = IssueHandler('test-owner', 'test-repo', 'test-token')
+        handler = IssueHandler("test-owner", "test-repo", "test-token")

        # Get comments with a specific comment_id
        specific_comment = handler._get_issue_comments(issue_number=1, comment_id=123)

        # Verify only the specific comment is returned
-        assert specific_comment == ['First comment']
+        assert specific_comment == ["First comment"]


 def test_pr_handler_get_converted_issues_with_specific_thread_comment():
@@ -288,50 +287,50 @@ def test_pr_handler_get_converted_issues_with_specific_thread_comment():
    specific_comment_id = 123

    # Mock GraphQL response for review threads
-    with patch('requests.get') as mock_get:
+    with patch("requests.get") as mock_get:
        # Mock the response for PRs
        mock_prs_response = MagicMock()
        mock_prs_response.json.return_value = [
            {
-                'number': 1,
-                'title': 'Test PR',
-                'body': 'Test Body',
-                'head': {'ref': 'test-branch'},
+                "number": 1,
+                "title": "Test PR",
+                "body": "Test Body",
+                "head": {"ref": "test-branch"},
            }
        ]

        # Mock the response for PR comments
        mock_comments_response = MagicMock()
        mock_comments_response.json.return_value = [
-            {'body': 'First comment', 'id': 123},
-            {'body': 'Second comment', 'id': 124},
+            {"body": "First comment", "id": 123},
+            {"body": "Second comment", "id": 124},
        ]

        # Mock the response for PR metadata (GraphQL)
        mock_graphql_response = MagicMock()
        mock_graphql_response.json.return_value = {
-            'data': {
-                'repository': {
-                    'pullRequest': {
-                        'closingIssuesReferences': {'edges': []},
-                        'reviews': {'nodes': []},
-                        'reviewThreads': {
-                            'edges': [
+            "data": {
+                "repository": {
+                    "pullRequest": {
+                        "closingIssuesReferences": {"edges": []},
+                        "reviews": {"nodes": []},
+                        "reviewThreads": {
+                            "edges": [
                                {
-                                    'node': {
-                                        'id': 'review-thread-1',
-                                        'isResolved': False,
-                                        'comments': {
-                                            'nodes': [
+                                    "node": {
+                                        "id": "review-thread-1",
+                                        "isResolved": False,
+                                        "comments": {
+                                            "nodes": [
                                                {
-                                                    'fullDatabaseId': 121,
-                                                    'body': 'Specific review comment',
-                                                    'path': 'file1.txt',
+                                                    "fullDatabaseId": 121,
+                                                    "body": "Specific review comment",
+                                                    "path": "file1.txt",
                                                },
                                                {
-                                                    'fullDatabaseId': 456,
-                                                    'body': 'Another review comment',
-                                                    'path': 'file2.txt',
+                                                    "fullDatabaseId": 456,
+                                                    "body": "Another review comment",
+                                                    "path": "file2.txt",
                                                },
                                            ]
                                        },
@@ -357,32 +356,30 @@ def test_pr_handler_get_converted_issues_with_specific_thread_comment():
        ]

        # Mock the post request for GraphQL
-        with patch('requests.post') as mock_post:
+        with patch("requests.post") as mock_post:
            mock_post.return_value = mock_graphql_response

            # Create an instance of PRHandler
-            handler = PRHandler('test-owner', 'test-repo', 'test-token')
+            handler = PRHandler("test-owner", "test-repo", "test-token")

            # Get converted issues
-            prs = handler.get_converted_issues(
-                issue_numbers=[1], comment_id=specific_comment_id
-            )
+            prs = handler.get_converted_issues(comment_id=specific_comment_id)

            # Verify that we got exactly one PR
            assert len(prs) == 1

            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments == ['First comment']
+            assert prs[0].thread_comments == ["First comment"]
            assert prs[0].review_comments == []
            assert prs[0].review_threads == []

            # Verify other fields are set correctly
            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
+            assert prs[0].title == "Test PR"
+            assert prs[0].body == "Test Body"
+            assert prs[0].owner == "test-owner"
+            assert prs[0].repo == "test-repo"
+            assert prs[0].head_branch == "test-branch"


 def test_pr_handler_get_converted_issues_with_specific_review_thread_comment():
@@ -390,50 +387,50 @@ def test_pr_handler_get_converted_issues_with_specific_review_thread_comment():
    specific_comment_id = 123

    # Mock GraphQL response for review threads
-    with patch('requests.get') as mock_get:
+    with patch("requests.get") as mock_get:
        # Mock the response for PRs
        mock_prs_response = MagicMock()
        mock_prs_response.json.return_value = [
            {
-                'number': 1,
-                'title': 'Test PR',
-                'body': 'Test Body',
-                'head': {'ref': 'test-branch'},
+                "number": 1,
+                "title": "Test PR",
+                "body": "Test Body",
+                "head": {"ref": "test-branch"},
            }
        ]

        # Mock the response for PR comments
        mock_comments_response = MagicMock()
        mock_comments_response.json.return_value = [
-            {'body': 'First comment', 'id': 120},
-            {'body': 'Second comment', 'id': 124},
+            {"body": "First comment", "id": 120},
+            {"body": "Second comment", "id": 124},
        ]

        # Mock the response for PR metadata (GraphQL)
        mock_graphql_response = MagicMock()
        mock_graphql_response.json.return_value = {
-            'data': {
-                'repository': {
-                    'pullRequest': {
-                        'closingIssuesReferences': {'edges': []},
-                        'reviews': {'nodes': []},
-                        'reviewThreads': {
-                            'edges': [
+            "data": {
+                "repository": {
+                    "pullRequest": {
+                        "closingIssuesReferences": {"edges": []},
+                        "reviews": {"nodes": []},
+                        "reviewThreads": {
+                            "edges": [
                                {
-                                    'node': {
-                                        'id': 'review-thread-1',
-                                        'isResolved': False,
-                                        'comments': {
-                                            'nodes': [
+                                    "node": {
+                                        "id": "review-thread-1",
+                                        "isResolved": False,
+                                        "comments": {
+                                            "nodes": [
                                                {
-                                                    'fullDatabaseId': specific_comment_id,
-                                                    'body': 'Specific review comment',
-                                                    'path': 'file1.txt',
+                                                    "fullDatabaseId": specific_comment_id,
+                                                    "body": "Specific review comment",
+                                                    "path": "file1.txt",
                                                },
                                                {
-                                                    'fullDatabaseId': 456,
-                                                    'body': 'Another review comment',
-                                                    'path': 'file1.txt',
+                                                    "fullDatabaseId": 456,
+                                                    "body": "Another review comment",
+                                                    "path": "file1.txt",
                                                },
                                            ]
                                        },
@@ -459,16 +456,14 @@ def test_pr_handler_get_converted_issues_with_specific_review_thread_comment():
        ]

        # Mock the post request for GraphQL
-        with patch('requests.post') as mock_post:
+        with patch("requests.post") as mock_post:
            mock_post.return_value = mock_graphql_response

            # Create an instance of PRHandler
-            handler = PRHandler('test-owner', 'test-repo', 'test-token')
+            handler = PRHandler("test-owner", "test-repo", "test-token")

            # Get converted issues
-            prs = handler.get_converted_issues(
-                issue_numbers=[1], comment_id=specific_comment_id
-            )
+            prs = handler.get_converted_issues(comment_id=specific_comment_id)

            # Verify that we got exactly one PR
            assert len(prs) == 1
@@ -480,17 +475,17 @@ def test_pr_handler_get_converted_issues_with_specific_review_thread_comment():
            assert isinstance(prs[0].review_threads[0], ReviewThread)
            assert (
                prs[0].review_threads[0].comment
-                == 'Specific review comment\n---\nlatest feedback:\nAnother review comment\n'
+                == "Specific review comment\n---\nlatest feedback:\nAnother review comment\n"
            )
-            assert prs[0].review_threads[0].files == ['file1.txt']
+            assert prs[0].review_threads[0].files == ["file1.txt"]

            # Verify other fields are set correctly
            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
+            assert prs[0].title == "Test PR"
+            assert prs[0].body == "Test Body"
+            assert prs[0].owner == "test-owner"
+            assert prs[0].repo == "test-repo"
+            assert prs[0].head_branch == "test-branch"


 def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs():
@@ -498,50 +493,50 @@ def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs():
    specific_comment_id = 123

    # Mock GraphQL response for review threads
-    with patch('requests.get') as mock_get:
+    with patch("requests.get") as mock_get:
        # Mock the response for PRs
        mock_prs_response = MagicMock()
        mock_prs_response.json.return_value = [
            {
-                'number': 1,
-                'title': 'Test PR fixes #3',
-                'body': 'Test Body',
-                'head': {'ref': 'test-branch'},
+                "number": 1,
+                "title": "Test PR fixes #3",
+                "body": "Test Body",
+                "head": {"ref": "test-branch"},
            }
        ]

        # Mock the response for PR comments
        mock_comments_response = MagicMock()
        mock_comments_response.json.return_value = [
-            {'body': 'First comment', 'id': 120},
-            {'body': 'Second comment', 'id': 124},
+            {"body": "First comment", "id": 120},
+            {"body": "Second comment", "id": 124},
        ]

        # Mock the response for PR metadata (GraphQL)
        mock_graphql_response = MagicMock()
        mock_graphql_response.json.return_value = {
-            'data': {
-                'repository': {
-                    'pullRequest': {
-                        'closingIssuesReferences': {'edges': []},
-                        'reviews': {'nodes': []},
-                        'reviewThreads': {
-                            'edges': [
+            "data": {
+                "repository": {
+                    "pullRequest": {
+                        "closingIssuesReferences": {"edges": []},
+                        "reviews": {"nodes": []},
+                        "reviewThreads": {
+                            "edges": [
                                {
-                                    'node': {
-                                        'id': 'review-thread-1',
-                                        'isResolved': False,
-                                        'comments': {
-                                            'nodes': [
+                                    "node": {
+                                        "id": "review-thread-1",
+                                        "isResolved": False,
+                                        "comments": {
+                                            "nodes": [
                                                {
-                                                    'fullDatabaseId': specific_comment_id,
-                                                    'body': 'Specific review comment that references #6',
-                                                    'path': 'file1.txt',
+                                                    "fullDatabaseId": specific_comment_id,
+                                                    "body": "Specific review comment that references #6",
+                                                    "path": "file1.txt",
                                                },
                                                {
-                                                    'fullDatabaseId': 456,
-                                                    'body': 'Another review comment referencing #7',
-                                                    'path': 'file2.txt',
+                                                    "fullDatabaseId": 456,
+                                                    "body": "Another review comment referencing #7",
+                                                    "path": "file2.txt",
                                                },
                                            ]
                                        },
@@ -562,13 +557,13 @@ def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs():
        # Mock the response for fetching the external issue referenced in PR body
        mock_external_issue_response_in_body = MagicMock()
        mock_external_issue_response_in_body.json.return_value = {
-            'body': 'External context #1.'
+            "body": "External context #1."
        }

        # Mock the response for fetching the external issue referenced in review thread
        mock_external_issue_response_review_thread = MagicMock()
        mock_external_issue_response_review_thread.json.return_value = {
-            'body': 'External context #2.'
+            "body": "External context #2."
        }

        mock_get.side_effect = [
@@ -581,16 +576,14 @@ def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs():
        ]

        # Mock the post request for GraphQL
-        with patch('requests.post') as mock_post:
+        with patch("requests.post") as mock_post:
            mock_post.return_value = mock_graphql_response

            # Create an instance of PRHandler
-            handler = PRHandler('test-owner', 'test-repo', 'test-token')
+            handler = PRHandler("test-owner", "test-repo", "test-token")

            # Get converted issues
-            prs = handler.get_converted_issues(
-                issue_numbers=[1], comment_id=specific_comment_id
-            )
+            prs = handler.get_converted_issues(comment_id=specific_comment_id)

            # Verify that we got exactly one PR
            assert len(prs) == 1
@@ -602,52 +595,52 @@ def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs():
            assert isinstance(prs[0].review_threads[0], ReviewThread)
            assert (
                prs[0].review_threads[0].comment
-                == 'Specific review comment that references #6\n---\nlatest feedback:\nAnother review comment referencing #7\n'
+                == "Specific review comment that references #6\n---\nlatest feedback:\nAnother review comment referencing #7\n"
            )
            assert prs[0].closing_issues == [
-                'External context #1.',
-                'External context #2.',
+                "External context #1.",
+                "External context #2.",
            ]  # Only includes references inside comment ID and body PR

            # Verify other fields are set correctly
            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR fixes #3'
-            assert prs[0].body == 'Test Body'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
+            assert prs[0].title == "Test PR fixes #3"
+            assert prs[0].body == "Test Body"
+            assert prs[0].owner == "test-owner"
+            assert prs[0].repo == "test-repo"
+            assert prs[0].head_branch == "test-branch"


 def test_pr_handler_get_converted_issues_with_duplicate_issue_refs():
    # Mock the necessary dependencies
-    with patch('requests.get') as mock_get:
+    with patch("requests.get") as mock_get:
        # Mock the response for PRs
        mock_prs_response = MagicMock()
        mock_prs_response.json.return_value = [
            {
-                'number': 1,
-                'title': 'Test PR',
-                'body': 'Test Body fixes #1',
-                'head': {'ref': 'test-branch'},
+                "number": 1,
+                "title": "Test PR",
+                "body": "Test Body fixes #1",
+                "head": {"ref": "test-branch"},
            }
        ]

        # Mock the response for PR comments
        mock_comments_response = MagicMock()
        mock_comments_response.json.return_value = [
-            {'body': 'First comment addressing #1'},
-            {'body': 'Second comment addressing #2'},
+            {"body": "First comment addressing #1"},
+            {"body": "Second comment addressing #2"},
        ]

        # Mock the response for PR metadata (GraphQL)
        mock_graphql_response = MagicMock()
        mock_graphql_response.json.return_value = {
-            'data': {
-                'repository': {
-                    'pullRequest': {
-                        'closingIssuesReferences': {'edges': []},
-                        'reviews': {'nodes': []},
-                        'reviewThreads': {'edges': []},
+            "data": {
+                "repository": {
+                    "pullRequest": {
+                        "closingIssuesReferences": {"edges": []},
+                        "reviews": {"nodes": []},
+                        "reviewThreads": {"edges": []},
                    }
                }
            }
@@ -661,13 +654,13 @@ def test_pr_handler_get_converted_issues_with_duplicate_issue_refs():
        # Mock the response for fetching the external issue referenced in PR body
        mock_external_issue_response_in_body = MagicMock()
        mock_external_issue_response_in_body.json.return_value = {
-            'body': 'External context #1.'
+            "body": "External context #1."
        }

        # Mock the response for fetching the external issue referenced in review thread
        mock_external_issue_response_in_comment = MagicMock()
        mock_external_issue_response_in_comment.json.return_value = {
-            'body': 'External context #2.'
+            "body": "External context #2."
        }

        mock_get.side_effect = [
@@ -680,32 +673,32 @@ def test_pr_handler_get_converted_issues_with_duplicate_issue_refs():
        ]

        # Mock the post request for GraphQL
-        with patch('requests.post') as mock_post:
+        with patch("requests.post") as mock_post:
            mock_post.return_value = mock_graphql_response

            # Create an instance of PRHandler
-            handler = PRHandler('test-owner', 'test-repo', 'test-token')
+            handler = PRHandler("test-owner", "test-repo", "test-token")

            # Get converted issues
-            prs = handler.get_converted_issues(issue_numbers=[1])
+            prs = handler.get_converted_issues()

            # Verify that we got exactly one PR
            assert len(prs) == 1

            # Verify that thread_comments are set correctly
            assert prs[0].thread_comments == [
-                'First comment addressing #1',
-                'Second comment addressing #2',
+                "First comment addressing #1",
+                "Second comment addressing #2",
            ]

            # Verify other fields are set correctly
            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body fixes #1'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
+            assert prs[0].title == "Test PR"
+            assert prs[0].body == "Test Body fixes #1"
+            assert prs[0].owner == "test-owner"
+            assert prs[0].repo == "test-repo"
+            assert prs[0].head_branch == "test-branch"
            assert prs[0].closing_issues == [
-                'External context #1.',
-                'External context #2.',
+                "External context #1.",
+                "External context #2.",
            ]
--- a/tests/unit/resolver/test_issue_handler_error_handling.py
+++ b/tests/unit/resolver/test_issue_handler_error_handling.py
@@ -1,94 +0,0 @@
-import pytest
-import requests
-from unittest.mock import patch, MagicMock
-
-from openhands.resolver.issue_definitions import PRHandler
-from openhands.resolver.github_issue import ReviewThread
-
-
-def test_handle_nonexistent_issue_reference():
-    handler = PRHandler("test-owner", "test-repo", "test-token")
-    
-    # Mock the requests.get to simulate a 404 error
-    mock_response = MagicMock()
-    mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("404 Client Error: Not Found")
-    
-    with patch('requests.get', return_value=mock_response):
-        # Call the method with a non-existent issue reference
-        result = handler._PRHandler__get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body="This references #999999",  # Non-existent issue
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None
-        )
-        
-        # The method should return an empty list since the referenced issue couldn't be fetched
-        assert result == []
-
-
-def test_handle_rate_limit_error():
-    handler = PRHandler("test-owner", "test-repo", "test-token")
-    
-    # Mock the requests.get to simulate a rate limit error
-    mock_response = MagicMock()
-    mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(
-        "403 Client Error: Rate Limit Exceeded"
-    )
-    
-    with patch('requests.get', return_value=mock_response):
-        # Call the method with an issue reference
-        result = handler._PRHandler__get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body="This references #123",
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None
-        )
-        
-        # The method should return an empty list since the request was rate limited
-        assert result == []
-
-
-def test_handle_network_error():
-    handler = PRHandler("test-owner", "test-repo", "test-token")
-    
-    # Mock the requests.get to simulate a network error
-    with patch('requests.get', side_effect=requests.exceptions.ConnectionError("Network Error")):
-        # Call the method with an issue reference
-        result = handler._PRHandler__get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body="This references #123",
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None
-        )
-        
-        # The method should return an empty list since the network request failed
-        assert result == []
-
-
-def test_successful_issue_reference():
-    handler = PRHandler("test-owner", "test-repo", "test-token")
-    
-    # Mock a successful response
-    mock_response = MagicMock()
-    mock_response.raise_for_status.return_value = None
-    mock_response.json.return_value = {"body": "This is the referenced issue body"}
-    
-    with patch('requests.get', return_value=mock_response):
-        # Call the method with an issue reference
-        result = handler._PRHandler__get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body="This references #123",
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None
-        )
-        
-        # The method should return a list with the referenced issue body
-        assert result == ["This is the referenced issue body"]
--- a/tests/unit/resolver/test_issue_references.py
+++ b/tests/unit/resolver/test_issue_references.py
@@ -1,34 +0,0 @@
-from openhands.resolver.issue_definitions import IssueHandler
-
-
-def test_extract_issue_references():
-    handler = IssueHandler("test-owner", "test-repo", "test-token")
-
-    # Test basic issue reference
-    assert handler._extract_issue_references("Fixes #123") == [123]
-
-    # Test multiple issue references
-    assert handler._extract_issue_references("Fixes #123, #456") == [123, 456]
-
-    # Test issue references in code blocks should be ignored
-    assert handler._extract_issue_references("""
-    Here's a code block:
-    ```python
-    # This is a comment with #123
-    def func():
-        pass  # Another #456
-    ```
-    But this #789 should be extracted
-    """) == [789]
-
-    # Test issue references in inline code should be ignored
-    assert handler._extract_issue_references("This `#123` should be ignored but #456 should be extracted") == [456]
-
-    # Test issue references in URLs should be ignored
-    assert handler._extract_issue_references("Check http://example.com/#123 but #456 should be extracted") == [456]
-
-    # Test issue references in markdown links should be extracted
-    assert handler._extract_issue_references("[Link to #123](http://example.com) and #456") == [123, 456]
-
-    # Test issue references with text around them
-    assert handler._extract_issue_references("Issue #123 is fixed and #456 is pending") == [123, 456]
--- a/tests/unit/resolver/test_resolve_issues.py
+++ b/tests/unit/resolver/test_resolve_issues.py
--- a/tests/unit/resolver/test_send_pull_request.py
+++ b/tests/unit/resolver/test_send_pull_request.py
@@ -322,17 +322,7 @@ def test_update_existing_pull_request(
    )


-@pytest.mark.parametrize(
-    'pr_type,target_branch',
-    [
-        ('branch', None),
-        ('draft', None),
-        ('ready', None),
-        ('branch', 'feature'),
-        ('draft', 'develop'),
-        ('ready', 'staging'),
-    ],
-)
+@pytest.mark.parametrize('pr_type', ['branch', 'draft', 'ready'])
@patch('subprocess.run')
@patch('requests.post')
@patch('requests.get')
@@ -344,22 +334,14 @@ def test_send_pull_request(
    mock_output_dir,
    mock_llm_config,
    pr_type,
-    target_branch,
 ):
    repo_path = os.path.join(mock_output_dir, 'repo')

-    # Mock API responses based on whether target_branch is specified
-    if target_branch:
-        mock_get.side_effect = [
-            MagicMock(status_code=404),  # Branch doesn't exist
-            MagicMock(status_code=200),  # Target branch exists
-        ]
-    else:
-        mock_get.side_effect = [
-            MagicMock(status_code=404),  # Branch doesn't exist
-            MagicMock(json=lambda: {'default_branch': 'main'}),  # Get default branch
-        ]
-
+    # Mock API responses
+    mock_get.side_effect = [
+        MagicMock(status_code=404),  # Branch doesn't exist
+        MagicMock(json=lambda: {'default_branch': 'main'}),
+    ]
    mock_post.return_value.json.return_value = {
        'html_url': 'https://github.com/test-owner/test-repo/pull/1'
    }
@@ -378,12 +360,10 @@ def test_send_pull_request(
        patch_dir=repo_path,
        pr_type=pr_type,
        llm_config=mock_llm_config,
-        target_branch=target_branch,
    )

    # Assert API calls
-    expected_get_calls = 2
-    assert mock_get.call_count == expected_get_calls
+    assert mock_get.call_count == 2

    # Check branch creation and push
    assert mock_run.call_count == 2
@@ -421,41 +401,10 @@ def test_send_pull_request(
        assert post_data['title'] == 'Fix issue #42: Test Issue'
        assert post_data['body'].startswith('This pull request fixes #42.')
        assert post_data['head'] == 'openhands-fix-issue-42'
-        assert post_data['base'] == (target_branch if target_branch else 'main')
+        assert post_data['base'] == 'main'
        assert post_data['draft'] == (pr_type == 'draft')


-@patch('requests.get')
-def test_send_pull_request_invalid_target_branch(
-    mock_get, mock_github_issue, mock_output_dir, mock_llm_config
-):
-    """Test that an error is raised when specifying a non-existent target branch"""
-    repo_path = os.path.join(mock_output_dir, 'repo')
-
-    # Mock API response for non-existent branch
-    mock_get.side_effect = [
-        MagicMock(status_code=404),  # Branch doesn't exist
-        MagicMock(status_code=404),  # Target branch doesn't exist
-    ]
-
-    # Test that ValueError is raised when target branch doesn't exist
-    with pytest.raises(
-        ValueError, match='Target branch nonexistent-branch does not exist'
-    ):
-        send_pull_request(
-            github_issue=mock_github_issue,
-            github_token='test-token',
-            github_username='test-user',
-            patch_dir=repo_path,
-            pr_type='ready',
-            llm_config=mock_llm_config,
-            target_branch='nonexistent-branch',
-        )
-
-    # Verify API calls
-    assert mock_get.call_count == 2
-
-
@patch('subprocess.run')
@patch('requests.post')
@patch('requests.get')
@@ -667,7 +616,6 @@ def test_process_single_pr_update(
        mock_llm_config,
        None,
        False,
-        None,
    )

    mock_initialize_repo.assert_called_once_with(mock_output_dir, 1, 'pr', 'branch 1')
@@ -740,7 +688,6 @@ def test_process_single_issue(
        mock_llm_config,
        None,
        False,
-        None,
    )

    # Assert that the mocked functions were called with correct arguments
@@ -757,10 +704,9 @@ def test_process_single_issue(
        github_username=github_username,
        patch_dir=f'{mock_output_dir}/patches/issue_1',
        pr_type=pr_type,
-        llm_config=mock_llm_config,
        fork_owner=None,
        additional_message=resolver_output.success_explanation,
-        target_branch=None,
+        llm_config=mock_llm_config,
    )


@@ -811,7 +757,6 @@ def test_process_single_issue_unsuccessful(
        mock_llm_config,
        None,
        False,
-        None,
    )

    # Assert that none of the mocked functions were called
@@ -918,7 +863,6 @@ def test_process_all_successful_issues(
                mock_llm_config,
                None,
                False,
-                None,
            ),
            call(
                'output_dir',
@@ -929,7 +873,6 @@ def test_process_all_successful_issues(
                mock_llm_config,
                None,
                False,
-                None,
            ),
        ]
    )
@@ -1028,7 +971,6 @@ def test_main(
    mock_args.llm_model = 'mock_model'
    mock_args.llm_base_url = 'mock_url'
    mock_args.llm_api_key = 'mock_key'
-    mock_args.target_branch = None
    mock_parser.return_value.parse_args.return_value = mock_args

    # Setup environment variables
@@ -1052,8 +994,12 @@ def test_main(
        api_key=mock_args.llm_api_key,
    )

-    # Use any_call instead of assert_called_with for more flexible matching
-    assert mock_process_single_issue.call_args == call(
+    # Assert function calls
+    mock_parser.assert_called_once()
+    mock_getenv.assert_any_call('GITHUB_TOKEN')
+    mock_path_exists.assert_called_with('/mock/output')
+    mock_load_single_resolver_output.assert_called_with('/mock/output/output.jsonl', 42)
+    mock_process_single_issue.assert_called_with(
        '/mock/output',
        mock_resolver_output,
        'mock_token',
@@ -1062,15 +1008,8 @@ def test_main(
        llm_config,
        None,
        False,
-        mock_args.target_branch,
    )

-    # Other assertions
-    mock_parser.assert_called_once()
-    mock_getenv.assert_any_call('GITHUB_TOKEN')
-    mock_path_exists.assert_called_with('/mock/output')
-    mock_load_single_resolver_output.assert_called_with('/mock/output/output.jsonl', 42)
-
    # Test for 'all_successful' issue number
    mock_args.issue_number = 'all_successful'
    main()
Author	SHA1	Message	Date
Robert Brennan	2bec240015	update sys prompt	2024-11-15 11:50:09 -05:00
Robert Brennan	a68ac2f5af	Merge branch 'main' into rb/dev-intent	2024-11-15 11:49:04 -05:00
Robert Brennan	61036b5bd1	fix empty msg	2024-11-02 20:12:39 -04:00
Robert Brennan	798f280f5f	Merge branch 'rb/dockerfile-fix' into rb/dev-intent	2024-11-02 19:27:19 -04:00
Robert Brennan	a847a11e6e	chmod	2024-11-02 19:25:23 -04:00
openhands	23cd526f09	fix: handle concurrent delete operations safely - Only schedule one delete timer per file - Add test for concurrent delete operations - Fix KeyError when multiple timers try to handle the same deletion	2024-11-02 22:45:28 +00:00
Robert Brennan	0b3b23df58	better logging	2024-11-02 18:42:16 -04:00
Robert Brennan	c480507332	Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent	2024-11-02 18:33:47 -04:00
Robert Brennan	c422f3670b	add agent configs	2024-11-02 18:33:41 -04:00
openhands	c86078654c	test: update file watcher tests to expect EventSource.USER	2024-11-02 22:31:42 +00:00
Robert Brennan	f7b2f20e85	change env	2024-11-02 18:27:42 -04:00
Robert Brennan	0481dc0b41	Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent	2024-11-02 18:27:34 -04:00
openhands	c231b9c348	fix: improve handling of atomic renames and neovim operations - Add detection of atomic renames (delete+create with same content) - Add delayed deletion handling to avoid spurious events - Fix handling of file deletions with debouncing disabled - Add test for atomic rename handling	2024-11-02 22:26:15 +00:00
Robert Brennan	0bb9cdc0a9	set env to user	2024-11-02 18:22:43 -04:00
openhands	0851ad87f6	fix: improve filesystem event handling and add tests - Add use_debouncing flag to control debouncing behavior - Fix event source to use EventSource.ENVIRONMENT consistently - Add proper handling of neovim temporary files - Add comprehensive tests for file operations and debouncing	2024-11-02 22:15:13 +00:00
openhands	7914d6ae76	fix: debounce filesystem events to handle neovim's file operations	2024-11-02 22:06:50 +00:00
Robert Brennan	40afe4bd9c	Revert "fix: handle neovim's delete-create cycle as edit operation" This reverts commit `a44b1a6408`.	2024-11-02 18:01:49 -04:00
Robert Brennan	607952f2b4	Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent	2024-11-02 18:01:38 -04:00
Robert Brennan	6867043ff2	add logs	2024-11-02 18:01:32 -04:00
openhands	a44b1a6408	fix: handle neovim's delete-create cycle as edit operation - Added buffer to track recently deleted files - Added time window to detect quick delete-create cycles - Modified file creation handler to detect and convert to edit events - Added delayed cleanup for unmatched delete events	2024-11-02 21:38:40 +00:00
Robert Brennan	eab6580dc7	fix logs	2024-11-02 17:28:33 -04:00
Robert Brennan	555c8b5135	fix display in cli	2024-11-02 17:28:18 -04:00
Robert Brennan	3ba0d157fa	update codeact	2024-11-02 17:07:31 -04:00
Robert Brennan	a96c61ed55	log spam	2024-11-02 16:41:23 -04:00
Robert Brennan	afe8254456	fix waiting user input	2024-11-02 16:38:43 -04:00
openhands	fb330c9b59	Make CLI input non-blocking using asyncio thread executor	2024-11-02 20:25:59 +00:00
Robert Brennan	c001eb70ab	fix lint	2024-11-02 16:21:16 -04:00
Robert Brennan	a9d7479d47	fix lint	2024-11-02 16:21:04 -04:00
Robert Brennan	e5eaec9682	add obs checking	2024-11-02 16:20:43 -04:00
Robert Brennan	53061b7d8d	update prompt	2024-11-02 16:20:09 -04:00
Robert Brennan	71df9c6f13	fix event source	2024-11-02 16:16:01 -04:00
openhands	8d93bf81f3	Add test for .git directory ignoring	2024-11-02 19:27:59 +00:00
Robert Brennan	b3911fd44f	Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent	2024-11-02 15:25:17 -04:00
openhands	4c0e5e7820	Improve .git directory ignoring to handle nested paths	2024-11-02 19:23:40 +00:00
Robert Brennan	e02237716f	lock	2024-11-02 15:22:50 -04:00
Robert Brennan	70feb228e8	Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent	2024-11-02 15:19:57 -04:00
openhands	5248c835ab	Fix diff generation to remove @@ line number headers	2024-11-02 19:18:43 +00:00
Robert Brennan	27c1c9d310	new event loop	2024-11-02 15:18:24 -04:00
Robert Brennan	d91f915f89	Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent	2024-11-02 15:12:33 -04:00
Robert Brennan	ce5a5fdfc2	revert plugins	2024-11-02 15:10:25 -04:00
openhands	b9df421ce5	Add comprehensive tests for FileWatcher	2024-11-02 19:09:44 +00:00
Robert Brennan	6e7f3b0499	Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent	2024-11-02 15:08:33 -04:00
openhands	527945cb96	Fix gitignore pattern matching for directories like node_modules	2024-11-02 19:07:28 +00:00
Robert Brennan	693ea45092	move watch	2024-11-02 15:00:51 -04:00
openhands	d8bdfa99e2	Add watchdog dependency for file monitoring	2024-11-02 19:00:28 +00:00
openhands	a4342023ba	Update FileWatcher to respect .gitignore in watched directory	2024-11-02 18:57:04 +00:00
openhands	c3c59bad9c	Add diff generation to FileWatcher's FileEditObservations	2024-11-02 17:54:51 +00:00
openhands	ebfba98f1b	Implement --watch functionality in CLI with FileEditObservation logging	2024-11-02 17:42:33 +00:00
openhands	c1e215c343	Update FileWatcher to use FileEditObservation and track file contents	2024-11-02 17:39:57 +00:00
openhands	110c1ad5dc	Add FileWatcher class for directory monitoring	2024-11-02 17:35:05 +00:00
openhands	f03fcbfc59	Add --watch option to CLI for directory monitoring	2024-11-02 16:56:09 +00:00
				`@@ -0,0 +1 @@`
				`"""Intent detection and processing for OpenHands."""`