Compare commits

...

51 Commits

Author SHA1 Message Date
Robert Brennan
2bec240015 update sys prompt 2024-11-15 11:50:09 -05:00
Robert Brennan
a68ac2f5af Merge branch 'main' into rb/dev-intent 2024-11-15 11:49:04 -05:00
Robert Brennan
61036b5bd1 fix empty msg 2024-11-02 20:12:39 -04:00
Robert Brennan
798f280f5f Merge branch 'rb/dockerfile-fix' into rb/dev-intent 2024-11-02 19:27:19 -04:00
Robert Brennan
a847a11e6e chmod 2024-11-02 19:25:23 -04:00
openhands
23cd526f09 fix: handle concurrent delete operations safely
- Only schedule one delete timer per file
- Add test for concurrent delete operations
- Fix KeyError when multiple timers try to handle the same deletion
2024-11-02 22:45:28 +00:00
Robert Brennan
0b3b23df58 better logging 2024-11-02 18:42:16 -04:00
Robert Brennan
c480507332 Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent 2024-11-02 18:33:47 -04:00
Robert Brennan
c422f3670b add agent configs 2024-11-02 18:33:41 -04:00
openhands
c86078654c test: update file watcher tests to expect EventSource.USER 2024-11-02 22:31:42 +00:00
Robert Brennan
f7b2f20e85 change env 2024-11-02 18:27:42 -04:00
Robert Brennan
0481dc0b41 Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent 2024-11-02 18:27:34 -04:00
openhands
c231b9c348 fix: improve handling of atomic renames and neovim operations
- Add detection of atomic renames (delete+create with same content)
- Add delayed deletion handling to avoid spurious events
- Fix handling of file deletions with debouncing disabled
- Add test for atomic rename handling
2024-11-02 22:26:15 +00:00
Robert Brennan
0bb9cdc0a9 set env to user 2024-11-02 18:22:43 -04:00
openhands
0851ad87f6 fix: improve filesystem event handling and add tests
- Add use_debouncing flag to control debouncing behavior
- Fix event source to use EventSource.ENVIRONMENT consistently
- Add proper handling of neovim temporary files
- Add comprehensive tests for file operations and debouncing
2024-11-02 22:15:13 +00:00
openhands
7914d6ae76 fix: debounce filesystem events to handle neovim's file operations 2024-11-02 22:06:50 +00:00
Robert Brennan
40afe4bd9c Revert "fix: handle neovim's delete-create cycle as edit operation"
This reverts commit a44b1a6408.
2024-11-02 18:01:49 -04:00
Robert Brennan
607952f2b4 Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent 2024-11-02 18:01:38 -04:00
Robert Brennan
6867043ff2 add logs 2024-11-02 18:01:32 -04:00
openhands
a44b1a6408 fix: handle neovim's delete-create cycle as edit operation
- Added buffer to track recently deleted files
- Added time window to detect quick delete-create cycles
- Modified file creation handler to detect and convert to edit events
- Added delayed cleanup for unmatched delete events
2024-11-02 21:38:40 +00:00
Robert Brennan
eab6580dc7 fix logs 2024-11-02 17:28:33 -04:00
Robert Brennan
555c8b5135 fix display in cli 2024-11-02 17:28:18 -04:00
Robert Brennan
3ba0d157fa update codeact 2024-11-02 17:07:31 -04:00
Robert Brennan
a96c61ed55 log spam 2024-11-02 16:41:23 -04:00
Robert Brennan
afe8254456 fix waiting user input 2024-11-02 16:38:43 -04:00
openhands
fb330c9b59 Make CLI input non-blocking using asyncio thread executor 2024-11-02 20:25:59 +00:00
Robert Brennan
c001eb70ab fix lint 2024-11-02 16:21:16 -04:00
Robert Brennan
a9d7479d47 fix lint 2024-11-02 16:21:04 -04:00
Robert Brennan
e5eaec9682 add obs checking 2024-11-02 16:20:43 -04:00
Robert Brennan
53061b7d8d update prompt 2024-11-02 16:20:09 -04:00
Robert Brennan
71df9c6f13 fix event source 2024-11-02 16:16:01 -04:00
openhands
8d93bf81f3 Add test for .git directory ignoring 2024-11-02 19:27:59 +00:00
Robert Brennan
b3911fd44f Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent 2024-11-02 15:25:17 -04:00
openhands
4c0e5e7820 Improve .git directory ignoring to handle nested paths 2024-11-02 19:23:40 +00:00
Robert Brennan
e02237716f lock 2024-11-02 15:22:50 -04:00
Robert Brennan
70feb228e8 Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent 2024-11-02 15:19:57 -04:00
openhands
5248c835ab Fix diff generation to remove @@ line number headers 2024-11-02 19:18:43 +00:00
Robert Brennan
27c1c9d310 new event loop 2024-11-02 15:18:24 -04:00
Robert Brennan
d91f915f89 Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent 2024-11-02 15:12:33 -04:00
Robert Brennan
ce5a5fdfc2 revert plugins 2024-11-02 15:10:25 -04:00
openhands
b9df421ce5 Add comprehensive tests for FileWatcher 2024-11-02 19:09:44 +00:00
Robert Brennan
6e7f3b0499 Merge branch 'rb/dev-intent' of ssh://github.com/all-hands-ai/openhands into rb/dev-intent 2024-11-02 15:08:33 -04:00
openhands
527945cb96 Fix gitignore pattern matching for directories like node_modules 2024-11-02 19:07:28 +00:00
Robert Brennan
693ea45092 move watch 2024-11-02 15:00:51 -04:00
openhands
d8bdfa99e2 Add watchdog dependency for file monitoring 2024-11-02 19:00:28 +00:00
openhands
a4342023ba Update FileWatcher to respect .gitignore in watched directory 2024-11-02 18:57:04 +00:00
openhands
c3c59bad9c Add diff generation to FileWatcher's FileEditObservations 2024-11-02 17:54:51 +00:00
openhands
ebfba98f1b Implement --watch functionality in CLI with FileEditObservation logging 2024-11-02 17:42:33 +00:00
openhands
c1e215c343 Update FileWatcher to use FileEditObservation and track file contents 2024-11-02 17:39:57 +00:00
openhands
110c1ad5dc Add FileWatcher class for directory monitoring 2024-11-02 17:35:05 +00:00
openhands
f03fcbfc59 Add --watch option to CLI for directory monitoring 2024-11-02 16:56:09 +00:00
10 changed files with 1490 additions and 8 deletions

View File

@@ -20,6 +20,7 @@ from openhands.events.action import (
IPythonRunCellAction,
MessageAction,
)
from openhands.events.event import EventSource
from openhands.events.observation import (
AgentDelegateObservation,
BrowserOutputObservation,
@@ -187,7 +188,9 @@ class CodeActAgent(Agent):
)
]
elif isinstance(action, CmdRunAction) and action.source == 'user':
content = [TextContent(text=f'User executed the command:\n{action.command}')]
content = [
TextContent(text=f'User executed the command:\n{action.command}')
]
return [
Message(
role='user',
@@ -255,6 +258,8 @@ class CodeActAgent(Agent):
message = Message(role='user', content=[TextContent(text=text)])
elif isinstance(obs, FileEditObservation):
text = truncate_content(str(obs), max_message_chars)
if obs.source == EventSource.USER:
text = '[User has edited a file]\n' + text
message = Message(role='user', content=[TextContent(text=text)])
elif isinstance(obs, BrowserOutputObservation):
text = obs.get_agent_obs_text()

View File

@@ -1,4 +1,9 @@
You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
You also observe user actions, like "User has edited a file", and
infer the user's long-term intentons based on these edits.
If you think you can help the user finish the task at hand,
you should offer a suggestion as to how you can
help, and wait for the user to confirm.
<IMPORTANT>
* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.

View File

@@ -0,0 +1,176 @@
{% set MINIMAL_SYSTEM_PREFIX %}
A chat between a curious user and an artificial intelligence assistant.
The assistant gives helpful, detailed answers to the user's questions.
It also observes user actions, like "User has edited a file", and
infers the user's long-term intentons based on these edits. If the agent thinks
it can help the user finish the task at hand, it offers a suggestion as to how it can
help, and waits for the user to confirm.
[1] The assistant can use a Python environment with <execute_ipython>, e.g.:
<execute_ipython>
print("Hello World!")
</execute_ipython>
[2] The assistant can execute bash commands wrapped with <execute_bash>, e.g. <execute_bash> ls </execute_bash>.
If a bash command returns exit code `-1`, this means the process is not yet finished.
The assistant must then send a second <execute_bash>. The second <execute_bash> can be empty
(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process,
or it can contain the text `ctrl+c` to interrupt the process.
For commands that may run indefinitely, the output should be redirected to a file and the command run
in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
If a command execution result says "Command timed out. Sending SIGINT to the process",
the assistant should retry running the command in the background.
[3] The assistant can edit files using <file_edit> by setting the file path and providing a draft of the new file content. The draft file content does not need to be exactly the same as the existing file content; the assistant may skip some lines and only include the parts that need to be changed.
IMPORTANT: When editing large file (e.g., > 300 lines), the assistant MUST SPECIFY the range of lines to be edited by setting `start` and `end` (1-indexed, both inclusive). For example, `<file_edit path="/path/to/file.txt" start=1 end=-1>` means the assistant will edit the whole file (from line 1 to the end of the file). `start=1` and `end=-1` are the default values, so the assistant can omit them if they are the same as the default values.
BEFORE you start editing, you MUST view the ENTIRE body of the part you want to edit and get the correct begin and end line numbers.
When editing files, the assistant should include comments indicating where the code will not change. For example, use comments like `# no changes before` or `# no changes here` to clearly mark sections of the code that remain unchanged. This helps to provide context and ensure clarity in the edits being made.
Possible cases:
- File too long: When the file to be edited is too long, the assistant should set `start` and `end` (1-indexed, both inclusive) to specify the range of lines to be edited. For example, `<file_edit path="/path/to/file.txt" start=100 end=200>` means the assistant will only edit lines 100 to 200 of `/path/to/file.txt`.
- Append to file: If the assistant wants to append to a file, it should set both `start` and `end` to `-1`.
- File does not exist: If `<file_edit>` is pointing to a file that does not exist, a new file with the exact content will be created.
Important: because line numbers are useful, the assistant should always use the provided functions to search (e.g., `search_dir`) or view the file content (e.g., `open_file`) along with the line numbers. DO NOT use other methods (e.g., `cat`) to view the file content.
**Example 1 (general edit for short files)**
For example, given an existing file `/path/to/file.py` that looks like this:
(this is the end of the file)
1|class MyClass:
2| def __init__(self):
3| self.x = 1
4| self.y = 2
5| self.z = 3
6|
7|print(MyClass().z)
8|print(MyClass().x)
(this is the end of the file)
The assistant wants to edit the file to look like this:
(this is the end of the file)
1|class MyClass:
2| def __init__(self):
3| self.x = 1
4| self.y = 2
5|
6|print(MyClass().y)
(this is the end of the file)
The assistant may produce an edit action like this:
<file_edit path="/path/to/file.txt" start=1 end=-1>
class MyClass:
def __init__(self):
# no changes before
self.y = 2
# self.z is removed
# MyClass().z is removed
print(MyClass().y)
</file_edit>
**Example 2 (append to file for short files)**
For example, given an existing file `/path/to/file.py` that looks like this:
(this is the end of the file)
1|class MyClass:
2| def __init__(self):
3| self.x = 1
4| self.y = 2
5| self.z = 3
6|
7|print(MyClass().z)
8|print(MyClass().x)
(this is the end of the file)
To append the following lines to the file:
```python
print(MyClass().y)
```
The assistant may produce an edit action like this:
<file_edit path="/path/to/file.txt" start=-1 end=-1>
print(MyClass().y)
</file_edit>
**Example 3 (edit for long files)**
Given an existing file `/path/to/file.py` that looks like this:
(1000 more lines above)
1001|class MyClass:
1002| def __init__(self):
1003| self.x = 1
1004| self.y = 2
1005| self.z = 3
1006|
1007|print(MyClass().z)
1008|print(MyClass().x)
(2000 more lines below)
The assistant wants to edit the file to look like this:
(1000 more lines above)
1001|class MyClass:
1002| def __init__(self):
1003| self.x = 1
1004| self.y = 2
1005|
1006|print(MyClass().y)
(2000 more lines below)
The assistant may produce an edit action like this:
<file_edit path="/path/to/file.txt" start=1001 end=1008>
class MyClass:
def __init__(self):
# no changes before
self.y = 2
# self.z is removed
# MyClass().z is removed
print(MyClass().y)
</file_edit>
{% endset %}
{% set BROWSING_PREFIX %}
The assistant can browse the Internet with <execute_browse> and </execute_browse>.
For example, <execute_browse> Tell me the usa's president using google search </execute_browse>.
Or <execute_browse> Tell me what is in http://example.com </execute_browse>.
{% endset %}
{% set PIP_INSTALL_PREFIX %}
The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
{% endset %}
{% set SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + BROWSING_PREFIX + PIP_INSTALL_PREFIX %}
{% set COMMAND_DOCS %}
Apart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:
{{ agent_skills_docs }}
IMPORTANT:
- `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits!
- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
- Any code issued should be less than 50 lines to avoid context being cut off!
{% endset %}
{% set SYSTEM_SUFFIX %}
Responses should be concise.
The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block.
Include ONLY ONE <execute_ipython>, <execute_bash>, or <execute_browse> per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed.
If the assistant is finished with the task you MUST include <finish></finish> in your response.
IMPORTANT: Execute code using <execute_ipython>, <execute_bash>, or <execute_browse> whenever possible.
The assistant should utilize full file paths and the `pwd` command to prevent path-related errors.
The assistant MUST NOT apologize to the user or thank the user after running commands or editing files. It should only address the user in response to an explicit message from the user, or to ask for more information.
The assistant MUST NOT push any changes to GitHub unless explicitly requested to do so.
{% endset %}
{# Combine all parts without newlines between them #}
{{ SYSTEM_PREFIX -}}
{{- COMMAND_DOCS -}}
{{- SYSTEM_SUFFIX }}

View File

@@ -0,0 +1,278 @@
{% set DEFAULT_EXAMPLE %}
--- START OF EXAMPLE ---
USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.
ASSISTANT:
Sure! Let me first check the current directory:
<execute_bash>
ls
</execute_bash>
USER:
OBSERVATION:
openhands@runtime:~/workspace$
ASSISTANT:
There is no `app.py` file in the current directory. Let me create a Python file `app.py`:
<file_edit path="/workspace/app.py" start=-1 end=-1>
from flask import Flask
app = Flask(__name__)
@app.route('/')
def index():
numbers = list(range(1, 11))
return str(numbers)
if __name__ == '__main__':
app.run(port=5000)
</file_edit>
USER:
OBSERVATION:
[New file /workspace/app.py is created.]
(begin of changes)
--- /workspace/app.py
+++ /workspace/app.py
@@ -1 +1,10 @@
+from flask import Flask
+app = Flask(__name__)
+@app.route('/')
+def index():
+ numbers = list(range(1, 11))
+ return str(numbers)
+
+if __name__ == '__main__':
+ app.run(port=5000)
(end of changes)
ASSISTANT:
I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
OBSERVATION:
[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1
ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
<execute_bash>
cat server.log
</execute_bash>
USER:
OBSERVATION:
Traceback (most recent call last):
File "/workspace/app.py", line 2, in <module>
from flask import Flask
ModuleNotFoundError: No module named 'flask'
ASSISTANT:
It seems that Flask is not installed. Let me install Flask for you:
<execute_bash>
pip install flask
</execute_bash>
USER:
OBSERVATION:
Defaulting to user installation because normal site-packages is not writeable
Collecting flask
Using cached flask-3.0.3-py3-none-any.whl (101 kB)
Collecting blinker>=1.6.2
Using cached blinker-1.7.0-py3-none-any.whl (13 kB)
Collecting Werkzeug>=3.0.0
Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB)
Collecting click>=8.1.3
Using cached click-8.1.7-py3-none-any.whl (97 kB)
Collecting itsdangerous>=2.1.2
Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Requirement already satisfied: Jinja2>=3.1.2 in /home/openhands/.local/lib/python3.10/site-packages (from flask) (3.1.3)
Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5)
Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
ASSISTANT:
Now that Flask is installed, let me run the Python file again:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
OBSERVATION:
[1] 124
ASSISTANT:
Let me check the server log again:
<execute_bash>
cat server.log
</execute_bash>
USER:
OBSERVATION:
* Serving Flask app 'app'
* Debug mode: off
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on http://127.0.0.1:5000
Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
USER: Now browse the newly started server's homepage and show me the content.
ASSISTANT:
Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
<execute_browse>
Get the content on "http://127.0.0.1:5000"
</execute_browse>
USER:
OBSERVATION:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
ASSISTANT:
The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
Sure! Let me stop the server first:
<execute_bash>
kill 124
</execute_bash>
USER:
OBSERVATION:
[1]+ Terminated python3 app.py > server.log 2>&1
ASSISTANT:
The server has been stopped. Let me open the Python file and modify it.
<execute_ipython>
open_file('app.py')
</execute_ipython>
USER:
[File: /workspace/app.py (10 lines total)]
(this is the beginning of the file)
1|from flask import Flask
2|app = Flask(__name__)
3|
4|@app.route('/')
5|def index():
6| numbers = list(range(1, 11))
7| return str(numbers)
8|
9|if __name__ == '__main__':
10| app.run(port=5000)
(this is the end of the file)
ASSISTANT:
I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file:
<file_edit path="/workspace/app.py" start=4 end=7>
@app.route('/')
def index():
numbers = list(range(1, 11))
ret = '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
return ret
</file_edit>
USER:
Observation:
[Edited existing file /workspace/app.py]
[File: /workspace/app.py (10 lines total after edit)]
(this is the beginning of the file)
1|from flask import Flask
2|app = Flask(__name__)
3|
4|@app.route('/')
5|def index():
6| numbers = list(range(1, 11))
7| ret = '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
8| return ret
9|
10|if __name__ == '__main__':
11| app.run(port=5000)
(this is the end of the file)
[File updated (edited at line 7). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
USER:
[User has edited a file]
OBSERVATION:
[Existing file app.py is edited with 1 changes.]
[begin of edit 1 / 1]
(content before edit)
1|from flask import Flask
2|app = Flask(__name__)
3|
4|@app.route('/')
5|def index():
6| numbers = list(range(1, 11))
7| ret = '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
8| return ret
9|
10|if __name__ == '__main__':
11| app.run(port=5000)
(content after edit)
1|from flask import Flask
2|app = Flask(__name__)
3|
4|@app.route('/')
5|def index():
6| numbers = list(range(1, 11))
7| ret = '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
8| return ret
9|
+10|@app.route('/reversed')
+11|
12|if __name__ == '__main__':
13| app.run(port=5000)
[end of edit 1 / 1]
ASSISTANT:
I see you've started to add a new route '/reversed'. Would you like me to
implement the logic for this route by reversing the numbers?
USER:
continue
ASSISTANT:
I should edit the file to display the reversed numbers in a table format. I should include correct indentation. Let me update the file:
<file_edit path="/workspace/app.py" start=10 end=12>
@app.route('/reversed')
def index():
numbers = reversed(list(range(1, 11)))
ret = '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
return ret
</file_edit>
ASSISTANT:
Running the updated file:
<execute_bash>
python3 app.py > server.log 2>&1 &
</execute_bash>
USER:
Observation:
[1] 126
ASSISTANT:
The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests!
<finish></finish>
--- END OF EXAMPLE ---
{% endset %}
Here is an example of how you can interact with the environment for task solving:
{{ DEFAULT_EXAMPLE }}
{% if micro_agent %}
--- BEGIN OF GUIDELINE ---
The following information may assist you in completing your task:
{{ micro_agent }}
--- END OF GUIDELINE ---
{% endif %}
NOW, LET'S START!

View File

@@ -282,6 +282,11 @@ class AgentController:
if self.state.agent_state == AgentState.USER_REJECTED:
await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
return
if observation.source == EventSource.USER:
if self.state.agent_state == AgentState.AWAITING_USER_INPUT:
await self.set_agent_state_to(AgentState.RUNNING)
elif isinstance(observation, ErrorObservation):
if self.state.agent_state == AgentState.ERROR:
self.state.metrics.merge(self.state.local_metrics)

View File

@@ -1,5 +1,6 @@
import asyncio
import logging
import os
import sys
from typing import Type
from uuid import uuid4
@@ -38,6 +39,8 @@ from openhands.storage import get_file_store
def display_message(message: str):
if not message:
return
print(colored('🤖 ' + message + '\n', 'yellow'))
@@ -56,7 +59,8 @@ def display_command_output(output: str):
def display_file_edit(event: FileEditAction | FileEditObservation):
print(colored(str(event), 'green'))
# print(colored(str(event), 'green'))
pass
def display_event(event: Event):
@@ -66,14 +70,24 @@ def display_event(event: Event):
if isinstance(event, MessageAction):
if event.source == EventSource.AGENT:
display_message(event.content)
if isinstance(event, CmdRunAction):
elif isinstance(event, CmdRunAction):
display_command(event.command)
if isinstance(event, CmdOutputObservation):
elif isinstance(event, CmdOutputObservation):
display_command_output(event.content)
if isinstance(event, FileEditAction):
display_file_edit(event)
if isinstance(event, FileEditObservation):
elif isinstance(event, FileEditAction):
display_file_edit(event)
elif isinstance(event, FileEditObservation):
if event.source == EventSource.ENVIRONMENT:
# For file watcher events, use a different color and format
if not event.prev_exist:
print(colored(f'📝 File created: {event.path}', 'cyan'))
elif event.new_content == '':
print(colored(f'🗑️ File deleted: {event.path}', 'red'))
else:
print(colored(f'✏️ File modified: {event.path}', 'yellow'))
else:
# For regular file edits, use the standard display
display_file_edit(event)
async def main():
@@ -89,6 +103,15 @@ async def main():
help='Show the version number and exit',
default=None,
)
# Add the watch directory argument
parser.add_argument(
'-w',
'--watch',
type=str,
help='Directory to watch for changes',
metavar='DIR',
default=None,
)
args = parser.parse_args()
if args.version:
@@ -110,6 +133,19 @@ async def main():
file_store = get_file_store(config.file_store, config.file_store_path)
event_stream = EventStream(sid, file_store)
if args.watch:
from openhands.intent.watch import FileWatcher
watch_dir = os.path.abspath(args.watch)
if not os.path.isdir(watch_dir):
print(
f"Error: Watch directory '{args.watch}' does not exist or is not a directory"
)
return
print(f'Starting file watcher for directory: {watch_dir}')
file_watcher = FileWatcher(directory=watch_dir, event_stream=event_stream)
file_watcher.start()
runtime_cls = get_runtime_cls(config.runtime)
runtime: Runtime = runtime_cls( # noqa: F841
config=config,
@@ -124,11 +160,12 @@ async def main():
max_iterations=config.max_iterations,
max_budget_per_task=config.max_budget_per_task,
agent_to_llm_config=config.get_agent_to_llm_config_map(),
agent_configs=config.get_agent_configs(),
event_stream=event_stream,
)
async def prompt_for_next_task():
# Run input() in a thread pool to avoid blocking the event loop
await controller.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
loop = asyncio.get_event_loop()
next_message = await loop.run_in_executor(
None, lambda: input('How can I help? >> ')
@@ -162,6 +199,11 @@ async def main():
controller, runtime, [AgentState.STOPPED, AgentState.ERROR]
)
# Stop file watcher if it was started
if args.watch and 'file_watcher' in locals():
print('Stopping file watcher...')
file_watcher.stop()
if __name__ == '__main__':
loop = asyncio.new_event_loop()

View File

@@ -0,0 +1 @@
"""Intent detection and processing for OpenHands."""

464
openhands/intent/watch.py Normal file
View File

@@ -0,0 +1,464 @@
import os
import time
from difflib import unified_diff
from pathlib import Path
from threading import Timer
from typing import Dict, Optional, Set
import pathspec
from watchdog.events import FileSystemEvent, FileSystemEventHandler
from watchdog.observers import Observer
from openhands.events import EventSource, EventStream
from openhands.events.observation import FileEditObservation
class FileWatcher(FileSystemEventHandler):
"""Watches a directory for filesystem changes and emits events to the EventStream.
Args:
directory (str): The directory path to watch for changes
event_stream (EventStream): The event stream to emit events to
recursive (bool, optional): Whether to watch subdirectories recursively. Defaults to True.
patterns (list[str], optional): List of glob patterns to match files against. Defaults to None.
ignore_patterns (list[str], optional): List of glob patterns to ignore. Defaults to None.
"""
def __init__(
self,
directory: str,
event_stream: EventStream,
recursive: bool = True,
patterns: Optional[list[str]] = None,
ignore_patterns: Optional[list[str]] = None,
):
super().__init__()
self.directory = os.path.abspath(directory)
self.event_stream = event_stream
self.recursive = recursive
self.patterns = patterns
# Always ignore .git directory and its contents
self.ignore_patterns = {'.git', '.git/*'}
# Add any explicitly provided ignore patterns
if ignore_patterns:
self.ignore_patterns.update(ignore_patterns)
# Load .gitignore patterns
self.gitignore_spec = self._load_gitignore()
self.observer = Observer()
# Keep track of file contents
self.file_contents: Dict[str, str] = {}
# Track files with pending changes
self.pending_changes: Set[str] = set()
# Debounce timer for each file
self.debounce_timers: Dict[str, Timer] = {}
# Debounce delay in seconds
self.debounce_delay = 0.1
# Whether to use debouncing (disabled for testing)
self.use_debouncing = True
# Track recently deleted files for handling atomic renames
self.recent_deletes: Dict[str, tuple[str, float]] = {}
# Time window to consider a delete+create as a rename (in seconds)
self.rename_window = 0.1
# Initialize file contents for existing files
self._initialize_file_contents()
def _load_gitignore(self) -> pathspec.PathSpec:
"""Load .gitignore patterns from the watched directory."""
gitignore_patterns = []
# Only look for .gitignore in the watched directory
gitignore_path = os.path.join(self.directory, '.gitignore')
try:
if os.path.isfile(gitignore_path):
with open(gitignore_path, 'r') as f:
patterns = f.read().splitlines()
# Filter out empty lines and comments
patterns = [p for p in patterns if p and not p.startswith('#')]
gitignore_patterns.extend(patterns)
except IOError:
pass
return pathspec.PathSpec.from_lines(
pathspec.patterns.GitWildMatchPattern, gitignore_patterns
)
def _initialize_file_contents(self):
"""Initialize the content cache for existing files in the watched directory."""
for root, dirs, files in os.walk(self.directory, topdown=True):
# Filter out ignored directories to prevent walking into them
dirs[:] = [
d for d in dirs if not self._should_ignore(os.path.join(root, d))
]
# Process files in non-ignored directories
for file in files:
abs_path = os.path.join(root, file)
if not self._should_ignore(abs_path) and self._should_watch(abs_path):
try:
with open(abs_path, 'r', encoding='utf-8') as f:
self.file_contents[abs_path] = f.read()
except (IOError, UnicodeDecodeError):
# Skip files that can't be read or aren't text files
pass
def start(self):
"""Start watching the directory for changes."""
self.observer.schedule(self, self.directory, recursive=self.recursive)
self.observer.start()
def stop(self):
"""Stop watching the directory."""
# Cancel any pending timers
for timer in self.debounce_timers.values():
timer.cancel()
self.observer.stop()
self.observer.join()
def _handle_debounced_change(self, path: str):
"""Handle a debounced file change event."""
if path not in self.pending_changes:
return
self.pending_changes.remove(path)
self.debounce_timers.pop(path, None)
# Skip if file should be ignored
if self._should_ignore(path) or not self._should_watch(path):
return
# Skip if this is a neovim swap file or backup file
if (
path.endswith('.swp')
or path.endswith('.swo')
or path.endswith('~')
or os.path.basename(path).startswith('4913')
):
return
rel_path = os.path.relpath(path, self.directory)
old_content = self.file_contents.get(path, '')
new_content = self._read_file_content(path)
# Only emit event if content actually changed
if old_content != new_content:
diff = self._generate_diff(old_content, new_content, rel_path)
self.file_contents[path] = new_content
observation = FileEditObservation(
path=rel_path,
prev_exist=True,
old_content=old_content,
new_content=new_content,
content=diff,
)
self.event_stream.add_event(observation, EventSource.USER)
def _schedule_debounced_change(self, path: str):
"""Schedule a debounced change event for a file."""
# Cancel existing timer if any
if path in self.debounce_timers:
self.debounce_timers[path].cancel()
# Create new timer
timer = Timer(self.debounce_delay, self._handle_debounced_change, args=[path])
timer.start()
self.debounce_timers[path] = timer
self.pending_changes.add(path)
def _should_ignore(self, path: str) -> bool:
"""Check if the path should be ignored based on ignore patterns and .gitignore."""
# Get path relative to watched directory
rel_path = os.path.relpath(path, self.directory)
# Convert Windows paths to Unix style for consistency
rel_path = rel_path.replace(os.sep, '/')
# First check if any part of the path contains .git
path_parts = rel_path.split('/')
for i in range(len(path_parts)):
if path_parts[i] == '.git':
return True
# Then check explicit ignore patterns
if any(Path(rel_path).match(pattern) for pattern in self.ignore_patterns):
return True
# For directories, we need to check both the directory path and path with trailing slash
is_dir = os.path.isdir(path)
if is_dir:
# Check directory path both with and without trailing slash
return self.gitignore_spec.match_file(
rel_path
) or self.gitignore_spec.match_file(rel_path + '/')
# For files, just check the path directly
return self.gitignore_spec.match_file(rel_path)
def _should_watch(self, path: str) -> bool:
"""Check if the path should be watched based on patterns."""
if self.patterns is None:
return True
rel_path = os.path.relpath(path, self.directory)
return any(Path(rel_path).match(pattern) for pattern in self.patterns)
def _read_file_content(self, path: str) -> str:
"""Read the content of a file, returning empty string if it fails."""
try:
with open(path, 'r', encoding='utf-8') as f:
return f.read()
except (IOError, UnicodeDecodeError):
return ''
def _generate_diff(self, old_content: str, new_content: str, path: str) -> str:
"""Generate a unified diff between old and new content without context lines."""
old_lines = old_content.splitlines(keepends=True)
new_lines = new_content.splitlines(keepends=True)
# Generate diff with no context lines (n=0)
diff_lines = list(
unified_diff(
old_lines, new_lines, fromfile=path, tofile=path, n=0, lineterm=''
)
)
# Remove the file name headers and timestamp lines (first 2 lines)
if len(diff_lines) > 2:
diff_lines = diff_lines[2:]
# Also remove the @@ lines that show line numbers
diff_lines = [line for line in diff_lines if not line.startswith('@@')]
return ''.join(diff_lines)
def on_created(self, event: FileSystemEvent):
"""Handle file creation event."""
if event.is_directory:
return
# If this is a neovim swap file or backup file, ignore it
if (
event.src_path.endswith('.swp')
or event.src_path.endswith('.swo')
or event.src_path.endswith('~')
or os.path.basename(event.src_path).startswith('4913')
):
return
if self._should_ignore(event.src_path) or not self._should_watch(
event.src_path
):
return
# Check if this is part of an atomic rename operation
rel_path = os.path.relpath(event.src_path, self.directory)
now = time.time()
for old_path, (old_content, timestamp) in list(self.recent_deletes.items()):
if now - timestamp <= self.rename_window:
# This is likely a rename operation
new_content = self._read_file_content(event.src_path)
if new_content == old_content:
# This is definitely a rename, don't emit any events
self.file_contents[event.src_path] = new_content
self.recent_deletes.pop(old_path)
return
if self.use_debouncing:
self._schedule_debounced_change(event.src_path)
else:
new_content = self._read_file_content(event.src_path)
self.file_contents[event.src_path] = new_content
# For new files, the diff will be all additions
diff = self._generate_diff('', new_content, rel_path)
observation = FileEditObservation(
path=rel_path,
prev_exist=False,
old_content='',
new_content=new_content,
content=diff,
)
self.event_stream.add_event(observation, EventSource.USER)
def on_modified(self, event: FileSystemEvent):
"""Handle file modification event."""
if event.is_directory:
return
# If this is a neovim swap file or backup file, ignore it
if (
event.src_path.endswith('.swp')
or event.src_path.endswith('.swo')
or event.src_path.endswith('~')
or os.path.basename(event.src_path).startswith('4913')
):
return
if self._should_ignore(event.src_path) or not self._should_watch(
event.src_path
):
return
if self.use_debouncing:
self._schedule_debounced_change(event.src_path)
else:
rel_path = os.path.relpath(event.src_path, self.directory)
old_content = self.file_contents.get(event.src_path, '')
new_content = self._read_file_content(event.src_path)
# Only emit event if content actually changed
if old_content != new_content:
diff = self._generate_diff(old_content, new_content, rel_path)
self.file_contents[event.src_path] = new_content
observation = FileEditObservation(
path=rel_path,
prev_exist=True,
old_content=old_content,
new_content=new_content,
content=diff,
)
self.event_stream.add_event(observation, EventSource.USER)
def on_deleted(self, event: FileSystemEvent):
"""Handle file deletion event."""
if event.is_directory:
return
# If this is a neovim swap file or backup file, ignore it
if (
event.src_path.endswith('.swp')
or event.src_path.endswith('.swo')
or event.src_path.endswith('~')
or os.path.basename(event.src_path).startswith('4913')
):
return
# Cancel any pending changes for this file
if event.src_path in self.debounce_timers:
self.debounce_timers[event.src_path].cancel()
self.debounce_timers.pop(event.src_path)
self.pending_changes.discard(event.src_path)
if self._should_ignore(event.src_path) or not self._should_watch(
event.src_path
):
return
# Store the deleted file's content
old_content = self.file_contents.get(event.src_path, '')
self.file_contents.pop(event.src_path, None)
if self.use_debouncing:
# Only schedule a delete timer if we haven't already scheduled one
if event.src_path not in self.recent_deletes:
# Store the content temporarily in case this is a rename
self.recent_deletes[event.src_path] = (old_content, time.time())
# Schedule cleanup of recent_deletes after the rename window
timer = Timer(
self.rename_window,
self._handle_delayed_delete,
args=[event.src_path, old_content],
)
timer.start()
else:
# Emit deletion event immediately
rel_path = os.path.relpath(event.src_path, self.directory)
diff = self._generate_diff(old_content, '', rel_path)
observation = FileEditObservation(
path=rel_path,
prev_exist=True,
old_content=old_content,
new_content='',
content=diff,
)
self.event_stream.add_event(observation, EventSource.USER)
def _handle_delayed_delete(self, path: str, old_content: str):
"""Handle a deletion after waiting to see if it's part of a rename."""
# Use dict.get() to safely check if the path is still in recent_deletes
# and its content matches what we expect
stored = self.recent_deletes.get(path)
if stored is not None and stored[0] == old_content:
# This was a real deletion, not part of a rename
rel_path = os.path.relpath(path, self.directory)
diff = self._generate_diff(old_content, '', rel_path)
observation = FileEditObservation(
path=rel_path,
prev_exist=True,
old_content=old_content,
new_content='',
content=diff,
)
self.event_stream.add_event(observation, EventSource.USER)
# Use pop with a default value to avoid KeyError
self.recent_deletes.pop(path, None)
def on_moved(self, event: FileSystemEvent):
"""Handle file move/rename event."""
if event.is_directory:
return
# Cancel any pending changes for the source file
if event.src_path in self.debounce_timers:
self.debounce_timers[event.src_path].cancel()
self.debounce_timers.pop(event.src_path)
self.pending_changes.discard(event.src_path)
# If this is a neovim swap file or backup file, ignore it
if (
event.src_path.endswith('.swp')
or event.src_path.endswith('.swo')
or event.src_path.endswith('~')
or os.path.basename(event.src_path).startswith('4913')
or event.dest_path.endswith('.swp')
or event.dest_path.endswith('.swo')
or event.dest_path.endswith('~')
or os.path.basename(event.dest_path).startswith('4913')
):
return
if self._should_ignore(event.src_path) or not self._should_watch(
event.src_path
):
return
# Handle source file deletion
src_rel_path = os.path.relpath(event.src_path, self.directory)
old_content = self.file_contents.get(event.src_path, '')
# For the source file, generate a deletion diff
src_diff = self._generate_diff(old_content, '', src_rel_path)
observation = FileEditObservation(
path=src_rel_path,
prev_exist=True,
old_content=old_content,
new_content='',
content=src_diff,
)
self.event_stream.add_event(observation, EventSource.USER)
self.file_contents.pop(event.src_path, None)
# Handle destination file creation
if not self._should_ignore(event.dest_path) and self._should_watch(
event.dest_path
):
dest_rel_path = os.path.relpath(event.dest_path, self.directory)
self.file_contents[event.dest_path] = old_content
# For the destination file, generate an addition diff
dest_diff = self._generate_diff('', old_content, dest_rel_path)
observation = FileEditObservation(
path=dest_rel_path,
prev_exist=False,
old_content='',
new_content=old_content,
content=dest_diff,
)
self.event_stream.add_event(observation, EventSource.USER)

View File

@@ -41,6 +41,7 @@ pyarrow = "17.0.0" # transitive dependency, pinned here to avoid conflicts
tenacity = "^8.5.0"
zope-interface = "7.1.1"
pathspec = "^0.12.1"
watchdog = "^3.0.0"
google-cloud-aiplatform = "*"
anthropic = {extras = ["vertex"], version = "*"}
grep-ast = "0.3.3"
@@ -95,6 +96,7 @@ reportlab = "*"
[tool.coverage.run]
concurrency = ["gevent"]
[tool.poetry.group.runtime.dependencies]
jupyterlab = "*"
notebook = "*"
@@ -125,6 +127,7 @@ ignore = ["D1"]
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.poetry.group.evaluation.dependencies]
streamlit = "*"
whatthepatch = "*"

503
tests/test_file_watcher.py Normal file
View File

@@ -0,0 +1,503 @@
import os
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from watchdog.events import FileCreatedEvent, FileDeletedEvent, FileModifiedEvent, FileMovedEvent
from openhands.events import EventSource
from openhands.events.observation import FileEditObservation
from openhands.intent.watch import FileWatcher
@pytest.fixture
def mock_event_stream():
"""Create a mock event stream."""
stream = MagicMock()
stream.add_event = MagicMock()
return stream
@pytest.fixture
def temp_dir():
"""Create a temporary directory for testing."""
with tempfile.TemporaryDirectory() as tmpdir:
yield tmpdir
@pytest.fixture
def watcher(mock_event_stream, temp_dir):
"""Create a FileWatcher instance with mocked components and debouncing disabled."""
with patch('watchdog.observers.Observer'):
watcher = FileWatcher(temp_dir, mock_event_stream)
watcher.use_debouncing = False # Disable debouncing for basic tests
yield watcher
def create_test_file(path: str, content: str = ""):
"""Create a test file with given content."""
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'w') as f:
f.write(content)
def test_file_creation(watcher, temp_dir):
"""Test that file creation events are handled correctly."""
file_path = os.path.join(temp_dir, "test.txt")
content = "Hello, World!"
# Create the file
create_test_file(file_path, content)
# Simulate watchdog event
event = FileCreatedEvent(file_path)
watcher.on_created(event)
# Verify the event was emitted correctly
watcher.event_stream.add_event.assert_called_once()
args = watcher.event_stream.add_event.call_args[0]
observation, source = args
assert isinstance(observation, FileEditObservation)
assert observation.path == "test.txt" # Should be relative path
assert observation.prev_exist is False
assert observation.old_content == ""
assert observation.new_content == content
assert observation.content.startswith("+Hello, World!")
assert source == EventSource.USER
def test_file_modification(watcher, temp_dir):
"""Test that file modification events are handled correctly."""
file_path = os.path.join(temp_dir, "test.txt")
old_content = "Old content"
new_content = "New content"
# Create initial file
create_test_file(file_path, old_content)
watcher.file_contents[file_path] = old_content
# Update the file
create_test_file(file_path, new_content)
# Simulate watchdog event
event = FileModifiedEvent(file_path)
watcher.on_modified(event)
# Verify the event was emitted correctly
watcher.event_stream.add_event.assert_called_once()
observation, source = watcher.event_stream.add_event.call_args[0]
assert isinstance(observation, FileEditObservation)
assert observation.path == "test.txt"
assert observation.prev_exist is True
assert observation.old_content == old_content
assert observation.new_content == new_content
assert "-Old content" in observation.content
assert "+New content" in observation.content
assert source == EventSource.USER
def test_file_deletion(watcher, temp_dir):
"""Test that file deletion events are handled correctly."""
file_path = os.path.join(temp_dir, "test.txt")
content = "Content to delete"
# Create initial file
create_test_file(file_path, content)
watcher.file_contents[file_path] = content
# Delete the file
os.unlink(file_path)
# Simulate watchdog event
event = FileDeletedEvent(file_path)
watcher.on_deleted(event)
# Verify the event was emitted correctly
watcher.event_stream.add_event.assert_called_once()
observation, source = watcher.event_stream.add_event.call_args[0]
assert isinstance(observation, FileEditObservation)
assert observation.path == "test.txt"
assert observation.prev_exist is True
assert observation.old_content == content
assert observation.new_content == ""
assert "-Content to delete" in observation.content
assert source == EventSource.USER
def test_file_move(watcher, temp_dir):
"""Test that file move/rename events are handled correctly."""
src_path = os.path.join(temp_dir, "old.txt")
dst_path = os.path.join(temp_dir, "new.txt")
content = "Content to move"
# Create source file
create_test_file(src_path, content)
watcher.file_contents[src_path] = content
# Move the file
os.rename(src_path, dst_path)
# Simulate watchdog event
event = FileMovedEvent(src_path, dst_path)
watcher.on_moved(event)
# Should have two events: deletion and creation
assert watcher.event_stream.add_event.call_count == 2
# Check deletion event
del_observation, del_source = watcher.event_stream.add_event.call_args_list[0][0]
assert isinstance(del_observation, FileEditObservation)
assert del_observation.path == "old.txt"
assert del_observation.prev_exist is True
assert del_observation.old_content == content
assert del_observation.new_content == ""
assert "-Content to move" in del_observation.content
assert del_source == EventSource.USER
# Check creation event
create_observation, create_source = watcher.event_stream.add_event.call_args_list[1][0]
assert isinstance(create_observation, FileEditObservation)
assert create_observation.path == "new.txt"
assert create_observation.prev_exist is False
assert create_observation.old_content == ""
assert create_observation.new_content == content
assert "+Content to move" in create_observation.content
assert create_source == EventSource.USER
def test_gitignore_handling(watcher, temp_dir):
"""Test that .gitignore patterns are respected."""
# Create a .gitignore file
gitignore_content = """
# Node modules
**/node_modules/
# Python
*.pyc
__pycache__/
# Custom
/ignored/
*.log
"""
create_test_file(os.path.join(temp_dir, ".gitignore"), gitignore_content)
# Reload gitignore patterns
watcher.gitignore_spec = watcher._load_gitignore()
# Test various paths
test_cases = [
("node_modules/file.txt", True),
("frontend/node_modules/package.json", True),
("deep/path/node_modules/file.js", True),
("file.pyc", True),
("dir/__pycache__/module.pyc", True),
("ignored/file.txt", True),
("debug.log", True),
("src/app.js", False),
("frontend/src/components/Button.tsx", False),
("README.md", False),
]
for rel_path, should_ignore in test_cases:
abs_path = os.path.join(temp_dir, rel_path)
assert watcher._should_ignore(abs_path) == should_ignore, f"Failed for {rel_path}"
def test_git_directory_ignored(watcher, temp_dir):
"""Test that .git directory is always ignored regardless of gitignore."""
# Create some files in a .git directory
git_files = [
".git/HEAD",
".git/config",
".git/refs/heads/main",
".git/objects/ab/cdef1234567890",
"subdir/.git/HEAD", # Test nested .git directories
"subdir/.git/config",
]
# Create the files
for rel_path in git_files:
abs_path = os.path.join(temp_dir, rel_path)
create_test_file(abs_path, "test content")
# Create some non-.git files for comparison
normal_files = [
"src/file.txt",
"subdir/file.txt",
]
for rel_path in normal_files:
abs_path = os.path.join(temp_dir, rel_path)
create_test_file(abs_path, "test content")
# Test that all .git paths are ignored
for rel_path in git_files:
abs_path = os.path.join(temp_dir, rel_path)
assert watcher._should_ignore(abs_path), f".git file not ignored: {rel_path}"
# Also test the directory itself
dir_path = os.path.dirname(abs_path)
if '.git' in os.path.basename(dir_path):
assert watcher._should_ignore(dir_path), f".git directory not ignored: {os.path.dirname(rel_path)}"
# Test that normal files are not ignored
for rel_path in normal_files:
abs_path = os.path.join(temp_dir, rel_path)
assert not watcher._should_ignore(abs_path), f"Non-.git file incorrectly ignored: {rel_path}"
def test_explicit_ignore_patterns(watcher, temp_dir):
"""Test that explicitly provided ignore patterns work."""
# Create watcher with custom ignore patterns
custom_patterns = ["*.txt", "temp/*"]
with patch('watchdog.observers.Observer'):
watcher = FileWatcher(
temp_dir,
watcher.event_stream,
ignore_patterns=custom_patterns
)
test_cases = [
("file.txt", True),
("path/to/doc.txt", True),
("temp/any.js", True),
("temp/file.py", True),
("file.js", False),
("docs/file.md", False),
]
for rel_path, should_ignore in test_cases:
abs_path = os.path.join(temp_dir, rel_path)
assert watcher._should_ignore(abs_path) == should_ignore, f"Failed for {rel_path}"
def test_watch_patterns(watcher, temp_dir):
"""Test that watch patterns work correctly."""
# Create watcher with watch patterns
watch_patterns = ["*.py", "src/*.ts"]
with patch('watchdog.observers.Observer'):
watcher = FileWatcher(
temp_dir,
watcher.event_stream,
patterns=watch_patterns
)
test_cases = [
("file.py", True),
("src/app.ts", True),
("src/deep/file.ts", False), # Not directly in src/
("file.js", False),
("src/file.js", False),
]
for rel_path, should_watch in test_cases:
abs_path = os.path.join(temp_dir, rel_path)
assert watcher._should_watch(abs_path) == should_watch, f"Failed for {rel_path}"
@pytest.fixture
def watcher_with_short_delay(mock_event_stream, temp_dir):
"""Create a FileWatcher instance with a very short debounce delay for testing."""
with patch('watchdog.observers.Observer'):
watcher = FileWatcher(temp_dir, mock_event_stream)
# Set a very short delay for testing
watcher.debounce_delay = 0.01
yield watcher
def test_debounce_rapid_changes(watcher_with_short_delay, temp_dir):
"""Test that rapid changes to a file result in a single event."""
import time
file_path = os.path.join(temp_dir, "test.txt")
initial_content = "Initial content"
final_content = "Final content"
# Create initial file
create_test_file(file_path, initial_content)
watcher_with_short_delay.file_contents[file_path] = initial_content
# Simulate rapid changes
for i in range(5):
create_test_file(file_path, f"Content version {i}")
event = FileModifiedEvent(file_path)
watcher_with_short_delay.on_modified(event)
# Final change
create_test_file(file_path, final_content)
event = FileModifiedEvent(file_path)
watcher_with_short_delay.on_modified(event)
# Wait for debounce timer
time.sleep(0.02) # Slightly longer than debounce_delay
# Should only have one event with the final content
watcher_with_short_delay.event_stream.add_event.assert_called_once()
observation, source = watcher_with_short_delay.event_stream.add_event.call_args[0]
assert isinstance(observation, FileEditObservation)
assert observation.path == "test.txt"
assert observation.old_content == initial_content
assert observation.new_content == final_content
def test_neovim_sequence(watcher_with_short_delay, temp_dir):
"""Test handling of neovim's sequence of file operations."""
import time
file_path = os.path.join(temp_dir, "test.txt")
initial_content = "Initial content"
final_content = "Final content"
# Create initial file
create_test_file(file_path, initial_content)
watcher_with_short_delay.file_contents[file_path] = initial_content
# Simulate neovim's sequence of operations
# 1. Create swap file
swap_path = os.path.join(temp_dir, "4913")
event = FileCreatedEvent(swap_path)
watcher_with_short_delay.on_created(event)
# 2. Delete swap file
event = FileDeletedEvent(swap_path)
watcher_with_short_delay.on_deleted(event)
# 3. Create backup
backup_path = file_path + "~"
event = FileCreatedEvent(backup_path)
watcher_with_short_delay.on_created(event)
# 4. Modify original file
create_test_file(file_path, final_content)
event = FileModifiedEvent(file_path)
watcher_with_short_delay.on_modified(event)
# 5. Delete backup
event = FileDeletedEvent(backup_path)
watcher_with_short_delay.on_deleted(event)
# Wait for debounce timer
time.sleep(0.02) # Slightly longer than debounce_delay
# Should only have one event with the final content
assert watcher_with_short_delay.event_stream.add_event.call_count == 1
observation, source = watcher_with_short_delay.event_stream.add_event.call_args[0]
assert isinstance(observation, FileEditObservation)
assert observation.path == "test.txt"
assert observation.old_content == initial_content
assert observation.new_content == final_content
def test_debounce_timer_cancellation(watcher_with_short_delay, temp_dir):
"""Test that pending debounce timers are properly cancelled."""
import time
file_path = os.path.join(temp_dir, "test.txt")
initial_content = "Initial content"
# Create initial file
create_test_file(file_path, initial_content)
watcher_with_short_delay.file_contents[file_path] = initial_content
# Start a change
event = FileModifiedEvent(file_path)
watcher_with_short_delay.on_modified(event)
# Verify timer is created
assert file_path in watcher_with_short_delay.debounce_timers
assert file_path in watcher_with_short_delay.pending_changes
# Delete the file before timer expires
event = FileDeletedEvent(file_path)
watcher_with_short_delay.on_deleted(event)
# Timer should be cancelled and removed
assert file_path not in watcher_with_short_delay.debounce_timers
assert file_path not in watcher_with_short_delay.pending_changes
# Wait to ensure no extra events
time.sleep(0.2) # Wait longer than rename_window
# Should only have the deletion event
assert watcher_with_short_delay.event_stream.add_event.call_count == 1
observation, source = watcher_with_short_delay.event_stream.add_event.call_args[0]
assert observation.new_content == "" # Deletion event
def test_concurrent_delete_handling(watcher_with_short_delay, temp_dir):
"""Test that concurrent delete operations are handled safely."""
import time
file_path = os.path.join(temp_dir, "test.txt")
content = "File content"
# Create initial file
create_test_file(file_path, content)
watcher_with_short_delay.file_contents[file_path] = content
# Simulate a delete
event = FileDeletedEvent(file_path)
watcher_with_short_delay.on_deleted(event)
# Simulate another delete before the first one is processed
watcher_with_short_delay.on_deleted(event)
# Wait for both timers
time.sleep(0.2) # Longer than rename_window
# Should only have one deletion event
assert watcher_with_short_delay.event_stream.add_event.call_count == 1
observation, source = watcher_with_short_delay.event_stream.add_event.call_args[0]
assert observation.path == "test.txt"
assert observation.old_content == content
assert observation.new_content == ""
def test_atomic_rename_handling(watcher_with_short_delay, temp_dir):
"""Test that atomic renames (delete+create with same content) are handled correctly."""
import time
old_path = os.path.join(temp_dir, "old.txt")
new_path = os.path.join(temp_dir, "new.txt")
content = "File content"
# Create initial file
create_test_file(old_path, content)
watcher_with_short_delay.file_contents[old_path] = content
# Simulate atomic rename (delete + create with same content)
event = FileDeletedEvent(old_path)
watcher_with_short_delay.on_deleted(event)
# Create the new file with the same content
create_test_file(new_path, content)
event = FileCreatedEvent(new_path)
watcher_with_short_delay.on_created(event)
# Wait a bit to ensure any delayed events are processed
time.sleep(0.02)
# Should have no events since it was just a rename
assert watcher_with_short_delay.event_stream.add_event.call_count == 0
assert new_path in watcher_with_short_delay.file_contents
assert watcher_with_short_delay.file_contents[new_path] == content
# Now modify the file
new_content = "Modified content"
create_test_file(new_path, new_content)
event = FileModifiedEvent(new_path)
watcher_with_short_delay.on_modified(event)
# Wait for debounce timer
time.sleep(0.02)
# Should now have one event for the modification
assert watcher_with_short_delay.event_stream.add_event.call_count == 1
observation, source = watcher_with_short_delay.event_stream.add_event.call_args[0]
assert observation.path == "new.txt"
assert observation.old_content == content
assert observation.new_content == new_content