From 13ca75c8cbd72686b1a3bf4c6f325571aa5d4a1b Mon Sep 17 00:00:00 2001 From: Boxuan Li Date: Wed, 7 May 2025 16:51:08 +0800 Subject: [PATCH] Add windows local runtime support with PowerShell (#7410) Co-authored-by: Boxuan Li (from Dev Box) Co-authored-by: Graham Neubig Co-authored-by: Engel Nyst --- .github/workflows/py-unit-tests.yml | 25 + .../agenthub/codeact_agent/codeact_agent.py | 8 +- .../agenthub/codeact_agent/tools/bash.py | 24 +- openhands/llm/fn_call_converter.py | 15 +- openhands/runtime/action_execution_server.py | 111 +- .../action_execution_client.py | 57 +- openhands/runtime/impl/local/local_runtime.py | 123 +- openhands/runtime/plugins/jupyter/__init__.py | 115 +- openhands/runtime/utils/runtime_init.py | 12 + openhands/runtime/utils/windows_bash.py | 1413 +++++++++++++++++ poetry.lock | 38 +- pyproject.toml | 5 +- tests/runtime/test_bash.py | 987 ++++++++---- tests/runtime/test_ipython.py | 46 + tests/unit/test_windows_bash.py | 594 +++++++ 15 files changed, 3116 insertions(+), 457 deletions(-) create mode 100644 openhands/runtime/utils/windows_bash.py create mode 100644 tests/unit/test_windows_bash.py diff --git a/.github/workflows/py-unit-tests.yml b/.github/workflows/py-unit-tests.yml index 96927ba666..3e63abe9eb 100644 --- a/.github/workflows/py-unit-tests.yml +++ b/.github/workflows/py-unit-tests.yml @@ -53,3 +53,28 @@ jobs: uses: codecov/codecov-action@v5 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + + # Run specific Windows python tests + test-on-windows: + name: Python Tests on Windows + runs-on: windows-latest + strategy: + matrix: + python-version: ['3.12'] + steps: + - uses: actions/checkout@v4 + - name: Install pipx + run: pip install pipx + - name: Install poetry via pipx + run: pipx install poetry + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'poetry' + - name: Install Python dependencies using Poetry + run: poetry install --without evaluation + - name: Run Windows unit tests + run: poetry run pytest -svv tests/unit/test_windows_bash.py + - name: Run Windows runtime tests + run: $env:TEST_RUNTIME="local"; poetry run pytest -svv tests/runtime/test_bash.py diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 612e976c2d..2262b107f2 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -1,5 +1,6 @@ import copy import os +import sys from collections import deque from typing import TYPE_CHECKING @@ -119,8 +120,11 @@ class CodeActAgent(Agent): if self.config.enable_finish: tools.append(FinishTool) if self.config.enable_browsing: - tools.append(WebReadTool) - tools.append(BrowserTool) + if sys.platform == 'win32': + logger.warning('Windows runtime does not support browsing yet') + else: + tools.append(WebReadTool) + tools.append(BrowserTool) if self.config.enable_jupyter: tools.append(IPythonTool) if self.config.enable_llm_editor: diff --git a/openhands/agenthub/codeact_agent/tools/bash.py b/openhands/agenthub/codeact_agent/tools/bash.py index 9adbeaa716..cd2d2388e1 100644 --- a/openhands/agenthub/codeact_agent/tools/bash.py +++ b/openhands/agenthub/codeact_agent/tools/bash.py @@ -1,3 +1,5 @@ +import sys + from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk _DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session. @@ -28,27 +30,35 @@ _SHORT_BASH_DESCRIPTION = """Execute a bash command in the terminal. * One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.""" +def refine_prompt(prompt: str): + if sys.platform == 'win32': + return prompt.replace('bash', 'powershell') + return prompt + + def create_cmd_run_tool( use_short_description: bool = False, ) -> ChatCompletionToolParam: - description = ( - _SHORT_BASH_DESCRIPTION if use_short_description else _DETAILED_BASH_DESCRIPTION - ) + description = _SHORT_BASH_DESCRIPTION if use_short_description else _DETAILED_BASH_DESCRIPTION return ChatCompletionToolParam( type='function', function=ChatCompletionToolParamFunctionChunk( - name='execute_bash', - description=description, + name=refine_prompt('execute_bash'), + description=refine_prompt(description), parameters={ 'type': 'object', 'properties': { 'command': { 'type': 'string', - 'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.', + 'description': refine_prompt( + 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.' + ), }, 'is_input': { 'type': 'string', - 'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.', + 'description': refine_prompt( + 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.' + ), 'enum': ['true', 'false'], }, }, diff --git a/openhands/llm/fn_call_converter.py b/openhands/llm/fn_call_converter.py index 81ea4b106d..6cb73389d4 100644 --- a/openhands/llm/fn_call_converter.py +++ b/openhands/llm/fn_call_converter.py @@ -9,6 +9,7 @@ We follow format from: https://docs.litellm.ai/docs/completion/function_call import copy import json import re +import sys from typing import Iterable from litellm import ChatCompletionToolParam @@ -47,8 +48,15 @@ Reminder: STOP_WORDS = [' reset Jupyter PWD' ) - reset_jupyter_cwd_code = ( - f'import os; os.chdir("{self.bash_session.cwd}")' - ) + # escape windows paths + cwd = self.bash_session.cwd.replace('\\', '/') + reset_jupyter_cwd_code = f'import os; os.chdir("{cwd}")' _aux_action = IPythonRunCellAction(code=reset_jupyter_cwd_code) _reset_obs: IPythonRunCellObservation = await _jupyter_plugin.run( _aux_action @@ -527,10 +583,18 @@ class ActionExecutor: ) async def browse(self, action: BrowseURLAction) -> Observation: + if self.browser is None: + return ErrorObservation( + 'Browser functionality is not supported on Windows.' + ) await self._ensure_browser_ready() return await browse(action, self.browser) async def browse_interactive(self, action: BrowseInteractiveAction) -> Observation: + if self.browser is None: + return ErrorObservation( + 'Browser functionality is not supported on Windows.' + ) await self._ensure_browser_ready() return await browse(action, self.browser) @@ -726,7 +790,6 @@ if __name__ == '__main__': if not isinstance(action, Action): raise HTTPException(status_code=400, detail='Invalid action type') client.last_execution_time = time.time() - observation = await client.run_action(action) return event_to_dict(observation) except Exception as e: @@ -897,7 +960,7 @@ if __name__ == '__main__': To list files: ```sh - curl http://localhost:3000/api/list-files + curl -X POST -d '{"path": "/"}' http://localhost:3000/list_files ``` Args: diff --git a/openhands/runtime/impl/action_execution/action_execution_client.py b/openhands/runtime/impl/action_execution/action_execution_client.py index 95dc8eac77..c6282f5073 100644 --- a/openhands/runtime/impl/action_execution/action_execution_client.py +++ b/openhands/runtime/impl/action_execution/action_execution_client.py @@ -158,7 +158,6 @@ class ActionExecutionClient(Runtime): def copy_from(self, path: str) -> Path: """Zip all files in the sandbox and return as a stream of bytes.""" - try: params = {'path': path} with self.session.stream( @@ -183,25 +182,44 @@ class ActionExecutionClient(Runtime): if not os.path.exists(host_src): raise FileNotFoundError(f'Source file {host_src} does not exist') + temp_zip_path: str | None = None # Define temp_zip_path outside the try block + try: + params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()} + file_to_upload = None + upload_data = {} + if recursive: + # Create and write the zip file inside the try block with tempfile.NamedTemporaryFile( suffix='.zip', delete=False ) as temp_zip: temp_zip_path = temp_zip.name - with ZipFile(temp_zip_path, 'w') as zipf: - for root, _, files in os.walk(host_src): - for file in files: - file_path = os.path.join(root, file) - arcname = os.path.relpath( - file_path, os.path.dirname(host_src) - ) - zipf.write(file_path, arcname) + try: + with ZipFile(temp_zip_path, 'w') as zipf: + for root, _, files in os.walk(host_src): + for file in files: + file_path = os.path.join(root, file) + arcname = os.path.relpath( + file_path, os.path.dirname(host_src) + ) + zipf.write(file_path, arcname) - upload_data = {'file': open(temp_zip_path, 'rb')} + self.log( + 'debug', + f'Opening temporary zip file for upload: {temp_zip_path}', + ) + file_to_upload = open(temp_zip_path, 'rb') + upload_data = {'file': file_to_upload} + except Exception as e: + # Ensure temp file is cleaned up if zipping fails + if temp_zip_path and os.path.exists(temp_zip_path): + os.unlink(temp_zip_path) + raise e # Re-raise the exception after cleanup attempt else: - upload_data = {'file': open(host_src, 'rb')} + file_to_upload = open(host_src, 'rb') + upload_data = {'file': file_to_upload} params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()} @@ -217,11 +235,18 @@ class ActionExecutionClient(Runtime): f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}. Response: {response.text}', ) finally: - if recursive: - os.unlink(temp_zip_path) - self.log( - 'debug', f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}' - ) + if file_to_upload: + file_to_upload.close() + + # Cleanup the temporary zip file if it was created + if temp_zip_path and os.path.exists(temp_zip_path): + try: + os.unlink(temp_zip_path) + except Exception as e: + self.log( + 'error', + f'Failed to delete temporary zip file {temp_zip_path}: {e}', + ) def get_vscode_token(self) -> str: if self.vscode_enabled and self.runtime_initialized: diff --git a/openhands/runtime/impl/local/local_runtime.py b/openhands/runtime/impl/local/local_runtime.py index 10a71c3d6c..b54c9cf19c 100644 --- a/openhands/runtime/impl/local/local_runtime.py +++ b/openhands/runtime/impl/local/local_runtime.py @@ -41,6 +41,18 @@ from openhands.utils.async_utils import call_sync_from_async from openhands.utils.tenacity_stop import stop_if_should_exit +def get_user_info(): + """Get user ID and username in a cross-platform way.""" + username = os.getenv('USER') + if sys.platform == 'win32': + # On Windows, we don't use user IDs the same way + # Return a default value that won't cause issues + return 1000, username + else: + # On Unix systems, use os.getuid() + return os.getuid(), username + + def check_dependencies(code_repo_path: str, poetry_venvs_path: str): ERROR_MESSAGE = 'Please follow the instructions in https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md to install OpenHands.' if not os.path.exists(code_repo_path): @@ -63,28 +75,33 @@ def check_dependencies(code_repo_path: str, poetry_venvs_path: str): if 'jupyter' not in output.lower(): raise ValueError('Jupyter is not properly installed. ' + ERROR_MESSAGE) - # Check libtmux is installed - logger.debug('Checking dependencies: libtmux') - import libtmux + # Check libtmux is installed (skip on Windows) - server = libtmux.Server() - try: - session = server.new_session(session_name='test-session') - except Exception: - raise ValueError('tmux is not properly installed or available on the path.') - pane = session.attached_pane - pane.send_keys('echo "test"') - pane_output = '\n'.join(pane.cmd('capture-pane', '-p').stdout) - session.kill_session() - if 'test' not in pane_output: - raise ValueError('libtmux is not properly installed. ' + ERROR_MESSAGE) + if sys.platform != 'win32': + logger.debug('Checking dependencies: libtmux') + import libtmux - # Check browser works - logger.debug('Checking dependencies: browser') - from openhands.runtime.browser.browser_env import BrowserEnv + server = libtmux.Server() + try: + session = server.new_session(session_name='test-session') + except Exception: + raise ValueError('tmux is not properly installed or available on the path.') + pane = session.attached_pane + pane.send_keys('echo "test"') + pane_output = '\n'.join(pane.cmd('capture-pane', '-p').stdout) + session.kill_session() + if 'test' not in pane_output: + raise ValueError('libtmux is not properly installed. ' + ERROR_MESSAGE) - browser = BrowserEnv() - browser.close() + # Skip browser environment check on Windows + if sys.platform != 'win32': + logger.debug('Checking dependencies: browser') + from openhands.runtime.browser.browser_env import BrowserEnv + + browser = BrowserEnv() + browser.close() + else: + logger.warning('Running on Windows - browser environment check skipped.') class LocalRuntime(ActionExecutionClient): @@ -110,9 +127,15 @@ class LocalRuntime(ActionExecutionClient): attach_to_existing: bool = False, headless_mode: bool = True, ): + self.is_windows = sys.platform == 'win32' + if self.is_windows: + logger.warning( + 'Running on Windows - some features that require tmux will be limited. ' + 'For full functionality, please consider using WSL or Docker runtime.' + ) + self.config = config - self._user_id = os.getuid() - self._username = os.getenv('USER') + self._user_id, self._username = get_user_info() if self.config.workspace_base is not None: logger.warning( @@ -161,6 +184,7 @@ class LocalRuntime(ActionExecutionClient): self.status_callback = status_callback self.server_process: subprocess.Popen[str] | None = None self.action_semaphore = threading.Semaphore(1) # Ensure one action at a time + self._log_thread_exit_event = threading.Event() # Add exit event # Update env vars if self.config.sandbox.runtime_startup_env_vars: @@ -199,7 +223,7 @@ class LocalRuntime(ActionExecutionClient): server_port=self._host_port, plugins=self.plugins, app_config=self.config, - python_prefix=[], + python_prefix=['poetry', 'run'], override_user_id=self._user_id, override_username=self._username, ) @@ -208,7 +232,7 @@ class LocalRuntime(ActionExecutionClient): env = os.environ.copy() # Get the code repo path code_repo_path = os.path.dirname(os.path.dirname(openhands.__file__)) - env['PYTHONPATH'] = f'{code_repo_path}{os.pathsep}{env.get("PYTHONPATH", "")}' + env['PYTHONPATH'] = os.pathsep.join([code_repo_path, env.get('PYTHONPATH', '')]) env['OPENHANDS_REPO_PATH'] = code_repo_path env['LOCAL_RUNTIME_MODE'] = '1' @@ -230,19 +254,50 @@ class LocalRuntime(ActionExecutionClient): universal_newlines=True, bufsize=1, env=env, + cwd=code_repo_path, # Explicitly set the working directory ) # Start a thread to read and log server output def log_output(): - while ( - self.server_process - and self.server_process.poll() - and self.server_process.stdout - ): - line = self.server_process.stdout.readline() - if not line: - break - self.log('debug', f'Server: {line.strip()}') + if not self.server_process or not self.server_process.stdout: + self.log('error', 'Server process or stdout not available for logging.') + return + + try: + # Read lines while the process is running and stdout is available + while self.server_process.poll() is None: + if self._log_thread_exit_event.is_set(): # Check exit event + self.log('info', 'Log thread received exit signal.') + break # Exit loop if signaled + line = self.server_process.stdout.readline() + if not line: + # Process might have exited between poll() and readline() + break + self.log('info', f'Server: {line.strip()}') + + # Capture any remaining output after the process exits OR if signaled + if ( + not self._log_thread_exit_event.is_set() + ): # Check again before reading remaining + self.log('info', 'Server process exited, reading remaining output.') + for line in self.server_process.stdout: + if ( + self._log_thread_exit_event.is_set() + ): # Check inside loop too + self.log( + 'info', + 'Log thread received exit signal while reading remaining output.', + ) + break + self.log('info', f'Server (remaining): {line.strip()}') + + except Exception as e: + # Log the error, but don't prevent the thread from potentially exiting + self.log('error', f'Error reading server output: {e}') + finally: + self.log( + 'info', 'Log output thread finished.' + ) # Add log for thread exit self._log_thread = threading.Thread(target=log_output, daemon=True) self._log_thread.start() @@ -312,6 +367,8 @@ class LocalRuntime(ActionExecutionClient): def close(self): """Stop the server process.""" + self._log_thread_exit_event.set() # Signal the log thread to exit + if self.server_process: self.server_process.terminate() try: @@ -319,7 +376,7 @@ class LocalRuntime(ActionExecutionClient): except subprocess.TimeoutExpired: self.server_process.kill() self.server_process = None - self._log_thread.join() + self._log_thread.join(timeout=5) # Add timeout to join if self._temp_workspace: shutil.rmtree(self._temp_workspace) diff --git a/openhands/runtime/plugins/jupyter/__init__.py b/openhands/runtime/plugins/jupyter/__init__.py index 34ac5691bf..35ea8fc654 100644 --- a/openhands/runtime/plugins/jupyter/__init__.py +++ b/openhands/runtime/plugins/jupyter/__init__.py @@ -1,5 +1,8 @@ import asyncio import os +import subprocess +import sys +import time from dataclasses import dataclass from openhands.core.logger import openhands_logger as logger @@ -20,7 +23,7 @@ class JupyterPlugin(Plugin): name: str = 'jupyter' kernel_gateway_port: int kernel_id: str - gateway_process: asyncio.subprocess.Process + gateway_process: asyncio.subprocess.Process | subprocess.Popen python_interpreter_path: str async def initialize( @@ -28,7 +31,10 @@ class JupyterPlugin(Plugin): ) -> None: self.kernel_gateway_port = find_available_tcp_port(40000, 49999) self.kernel_id = kernel_id - if username in ['root', 'openhands']: + is_local_runtime = os.environ.get('LOCAL_RUNTIME_MODE') == '1' + is_windows = sys.platform == 'win32' + + if not is_local_runtime: # Non-LocalRuntime prefix = f'su - {username} -s ' # cd to code repo, setup all env vars and run micromamba @@ -50,37 +56,84 @@ class JupyterPlugin(Plugin): ) # The correct environment is ensured by the PATH in LocalRuntime. poetry_prefix = f'cd {code_repo_path}\n' - jupyter_launch_command = ( - f"{prefix}/bin/bash << 'EOF'\n" - f'{poetry_prefix}' - 'poetry run jupyter kernelgateway ' - '--KernelGatewayApp.ip=0.0.0.0 ' - f'--KernelGatewayApp.port={self.kernel_gateway_port}\n' - 'EOF' - ) - logger.debug(f'Jupyter launch command: {jupyter_launch_command}') - # Using asyncio.create_subprocess_shell instead of subprocess.Popen - # to avoid ASYNC101 linting error - self.gateway_process = await asyncio.create_subprocess_shell( - jupyter_launch_command, - stderr=asyncio.subprocess.STDOUT, - stdout=asyncio.subprocess.PIPE, - ) - # read stdout until the kernel gateway is ready - output = '' - while should_continue() and self.gateway_process.stdout is not None: - line_bytes = await self.gateway_process.stdout.readline() - line = line_bytes.decode('utf-8') - output += line - if 'at' in line: - break - await asyncio.sleep(1) - logger.debug('Waiting for jupyter kernel gateway to start...') + if is_windows: + # Windows-specific command format + jupyter_launch_command = ( + f'cd /d "{code_repo_path}" && ' + 'poetry run jupyter kernelgateway ' + '--KernelGatewayApp.ip=0.0.0.0 ' + f'--KernelGatewayApp.port={self.kernel_gateway_port}' + ) + logger.debug(f'Jupyter launch command (Windows): {jupyter_launch_command}') + + # Using synchronous subprocess.Popen for Windows as asyncio.create_subprocess_shell + # has limitations on Windows platforms + self.gateway_process = subprocess.Popen( # type: ignore[ASYNC101] # noqa: ASYNC101 + jupyter_launch_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + shell=True, + text=True, + ) + + # Windows-specific stdout handling with synchronous time.sleep + # as asyncio has limitations on Windows for subprocess operations + output = '' + while should_continue(): + if self.gateway_process.stdout is None: + time.sleep(1) # type: ignore[ASYNC101] # noqa: ASYNC101 + continue + + line = self.gateway_process.stdout.readline() + if not line: + time.sleep(1) # type: ignore[ASYNC101] # noqa: ASYNC101 + continue + + output += line + if 'at' in line: + break + + time.sleep(1) # type: ignore[ASYNC101] # noqa: ASYNC101 + logger.debug('Waiting for jupyter kernel gateway to start...') + + logger.debug( + f'Jupyter kernel gateway started at port {self.kernel_gateway_port}. Output: {output}' + ) + else: + # Unix systems (Linux/macOS) + jupyter_launch_command = ( + f"{prefix}/bin/bash << 'EOF'\n" + f'{poetry_prefix}' + 'poetry run jupyter kernelgateway ' + '--KernelGatewayApp.ip=0.0.0.0 ' + f'--KernelGatewayApp.port={self.kernel_gateway_port}\n' + 'EOF' + ) + logger.debug(f'Jupyter launch command: {jupyter_launch_command}') + + # Using asyncio.create_subprocess_shell instead of subprocess.Popen + # to avoid ASYNC101 linting error + self.gateway_process = await asyncio.create_subprocess_shell( + jupyter_launch_command, + stderr=asyncio.subprocess.STDOUT, + stdout=asyncio.subprocess.PIPE, + ) + # read stdout until the kernel gateway is ready + output = '' + while should_continue() and self.gateway_process.stdout is not None: + line_bytes = await self.gateway_process.stdout.readline() + line = line_bytes.decode('utf-8') + output += line + if 'at' in line: + break + await asyncio.sleep(1) + logger.debug('Waiting for jupyter kernel gateway to start...') + + logger.debug( + f'Jupyter kernel gateway started at port {self.kernel_gateway_port}. Output: {output}' + ) - logger.debug( - f'Jupyter kernel gateway started at port {self.kernel_gateway_port}. Output: {output}' - ) _obs = await self.run( IPythonRunCellAction(code='import sys; print(sys.executable)') ) diff --git a/openhands/runtime/utils/runtime_init.py b/openhands/runtime/utils/runtime_init.py index 511a523831..060e08b47d 100644 --- a/openhands/runtime/utils/runtime_init.py +++ b/openhands/runtime/utils/runtime_init.py @@ -1,5 +1,6 @@ import os import subprocess +import sys from openhands.core.logger import openhands_logger as logger @@ -32,6 +33,17 @@ def init_user_and_working_directory( Returns: int | None: The user ID if it was updated, None otherwise. """ + # If running on Windows, just create the directory and return + if sys.platform == 'win32': + logger.debug('Running on Windows, skipping Unix-specific user setup') + logger.debug(f'Client working directory: {initial_cwd}') + + # Create the working directory if it doesn't exist + os.makedirs(initial_cwd, exist_ok=True) + logger.debug(f'Created working directory: {initial_cwd}') + + return None + # if username is CURRENT_USER, then we don't need to do anything # This is specific to the local runtime if username == os.getenv('USER') and username not in ['root', 'openhands']: diff --git a/openhands/runtime/utils/windows_bash.py b/openhands/runtime/utils/windows_bash.py new file mode 100644 index 0000000000..30a360dfff --- /dev/null +++ b/openhands/runtime/utils/windows_bash.py @@ -0,0 +1,1413 @@ +""" +This module provides a Windows-specific implementation for running commands +in a PowerShell session using the pythonnet library to interact with the .NET +PowerShell SDK directly. This aims to provide a more robust and integrated +way to manage PowerShell processes compared to using temporary script files. +""" + +import os +import time +import traceback +from pathlib import Path +from threading import RLock + +import pythonnet + +from openhands.core.logger import openhands_logger as logger +from openhands.events.action import CmdRunAction +from openhands.events.observation import ErrorObservation +from openhands.events.observation.commands import ( + CmdOutputMetadata, + CmdOutputObservation, +) +from openhands.utils.shutdown_listener import should_continue + +pythonnet.load('coreclr') +logger.info("Successfully called pythonnet.load('coreclr')") + +# Now that pythonnet is initialized, import clr and System +try: + import clr + + logger.debug(f'Imported clr module from: {clr.__file__}') + # Load System assembly *after* pythonnet is initialized + clr.AddReference('System') + import System +except Exception as clr_sys_ex: + raise RuntimeError(f'FATAL: Failed to import clr or System. Error: {clr_sys_ex}') + +# Attempt to load the PowerShell SDK assembly only if clr and System loaded +ps_sdk_path = None +try: + # Prioritize PowerShell 7+ if available (adjust path if necessary) + pwsh7_path = ( + Path(os.environ.get('ProgramFiles', 'C:\\Program Files')) + / 'PowerShell' + / '7' + / 'System.Management.Automation.dll' + ) + if pwsh7_path.exists(): + ps_sdk_path = str(pwsh7_path) + clr.AddReference(ps_sdk_path) + logger.info(f'Loaded PowerShell SDK (Core): {ps_sdk_path}') + else: + # Fallback to Windows PowerShell 5.1 bundled with Windows + winps_path = ( + Path(os.environ.get('SystemRoot', 'C:\\Windows')) + / 'System32' + / 'WindowsPowerShell' + / 'v1.0' + / 'System.Management.Automation.dll' + ) + if winps_path.exists(): + ps_sdk_path = str(winps_path) + clr.AddReference(ps_sdk_path) + logger.debug(f'Loaded PowerShell SDK (Desktop): {ps_sdk_path}') + else: + # Last resort: try loading by assembly name (might work if in GAC or path) + clr.AddReference('System.Management.Automation') + logger.info( + 'Attempted to load PowerShell SDK by name (System.Management.Automation)' + ) + + from System.Management.Automation import JobState, PowerShell + from System.Management.Automation.Language import Parser + from System.Management.Automation.Runspaces import ( + RunspaceFactory, + RunspaceState, + ) +except Exception as e: + raise RuntimeError( + f'FATAL: Failed to load PowerShell SDK components. Error: {e}. Check pythonnet installation and .NET Runtime compatibility. Path searched: {ps_sdk_path}' + ) + + +class WindowsPowershellSession: + """ + Manages a persistent PowerShell session using the .NET SDK via pythonnet. + + Allows executing commands within a single runspace, preserving state + (variables, current directory) between calls. + Handles basic timeout and captures output/error streams. + """ + + def __init__( + self, + work_dir: str, + username: str | None = None, + no_change_timeout_seconds: int = 30, + max_memory_mb: int | None = None, + ): + """ + Initializes the PowerShell session. + + Args: + work_dir: The starting working directory for the session. + username: (Currently ignored) Username for execution. PowerShell SDK typically runs as the current user. + no_change_timeout_seconds: Timeout in seconds if no output change is detected (currently NOT fully implemented). + max_memory_mb: (Currently ignored) Maximum memory limit for the process. + """ + # Initialize state flags early to prevent AttributeError in __del__ if init fails + self._closed = False + self._initialized = False + self.runspace = None # Initialize runspace to None + + if PowerShell is None: # Check if SDK loading failed during module import + # Logged critical error during import, just raise here to prevent instantiation + raise RuntimeError( + 'PowerShell SDK (System.Management.Automation.dll) could not be loaded. Cannot initialize WindowsPowershellSession.' + ) + + self.work_dir = os.path.abspath(work_dir) + self.username = username + self._cwd = self.work_dir + self.NO_CHANGE_TIMEOUT_SECONDS = no_change_timeout_seconds + self.max_memory_mb = max_memory_mb # Stored, but not used yet. + + self.active_job = None + self._job_lock = RLock() + self._last_job_output = '' # Stores cumulative output returned in the last observation for the active job + self._last_job_error: list[ + str + ] = [] # Stores cumulative errors returned in the last observation for the active job + + # Create and open the persistent runspace + try: + # Consider InitialSessionState for more control (e.g., execution policy) + # iss = InitialSessionState.CreateDefault() + # iss.ExecutionPolicy = Microsoft.PowerShell.ExecutionPolicy.Unrestricted # Requires importing Microsoft.PowerShell namespace + # self.runspace = RunspaceFactory.CreateRunspace(iss) + self.runspace = RunspaceFactory.CreateRunspace() + self.runspace.Open() + # Set initial working directory within the runspace + self._set_initial_cwd() + self._initialized = True # Set to True only on successful initialization + logger.info(f'PowerShell runspace created. Initial CWD set to: {self._cwd}') + except Exception as e: + logger.error(f'Failed to create or open PowerShell runspace: {e}') + logger.error(traceback.format_exc()) + self.close() # Ensure cleanup if init fails partially + raise RuntimeError(f'Failed to initialize PowerShell runspace: {e}') + + def _set_initial_cwd(self): + """Sets the initial working directory in the runspace.""" + ps = None + try: + ps = PowerShell.Create() + ps.Runspace = self.runspace + ps.AddScript(f'Set-Location -Path "{self._cwd}"').Invoke() + if ps.Streams.Error: + errors = '\n'.join([str(err) for err in ps.Streams.Error]) + logger.warning(f"Error setting initial CWD to '{self._cwd}': {errors}") + # Confirm actual CWD if setting failed + self._confirm_cwd() + else: + logger.debug(f'Successfully set initial runspace CWD to {self._cwd}') + # Optional: Confirm CWD even on success for robustness + # self._confirm_cwd() + except Exception as e: + logger.error(f'Exception setting initial CWD: {e}') + logger.error(traceback.format_exc()) + # Attempt to confirm CWD even if setting threw an exception + self._confirm_cwd() + finally: + if ps: + ps.Dispose() + + def _confirm_cwd(self): + """Confirms the actual CWD in the runspace and updates self._cwd.""" + ps_confirm = None + try: + ps_confirm = PowerShell.Create() + ps_confirm.Runspace = self.runspace + ps_confirm.AddScript('Get-Location') + results = ps_confirm.Invoke() + if results and results.Count > 0 and hasattr(results[0], 'Path'): + actual_cwd = str(results[0].Path) + if os.path.isdir(actual_cwd): + if actual_cwd != self._cwd: + logger.warning( + f'Runspace CWD ({actual_cwd}) differs from expected ({self._cwd}). Updating session CWD.' + ) + self._cwd = actual_cwd + else: + logger.debug(f'Confirmed runspace CWD is {self._cwd}') + else: + logger.error( + f'Get-Location returned an invalid path: {actual_cwd}. Session CWD may be inaccurate.' + ) + elif ps_confirm.Streams.Error: + errors = '\n'.join([str(err) for err in ps_confirm.Streams.Error]) + logger.error(f'Error confirming runspace CWD: {errors}') + else: + logger.error('Could not confirm runspace CWD (No result or error).') + except Exception as e: + logger.error(f'Exception confirming CWD: {e}') + finally: + if ps_confirm: + ps_confirm.Dispose() + + @property + def cwd(self) -> str: + """Gets the last known working directory of the session.""" + return self._cwd + + def _run_ps_command( + self, script: str, log_output: bool = True + ) -> list[System.Management.Automation.PSObject]: + """Helper to run a simple synchronous command in the runspace.""" + if log_output: + logger.debug(f"Running PS command: '{script}'") + ps = None + results = [] + try: + ps = PowerShell.Create() + ps.Runspace = self.runspace + ps.AddScript(script) + results = ps.Invoke() + except Exception as e: + logger.error(f'Exception running script: {script}\n{e}') + finally: + if ps: + ps.Dispose() + return results if results else [] + + def _get_job_object( + self, job_id: int | None + ) -> System.Management.Automation.Job | None: + """Retrieves a job object by its ID.""" + script = f'Get-Job -Id {job_id}' + results = self._run_ps_command(script, log_output=False) + if results and len(results) > 0: + potential_job_wrapper = results[0] + try: + underlying_job = potential_job_wrapper.BaseObject + # Basic check for job-like properties before returning + _ = underlying_job.Id + _ = underlying_job.JobStateInfo.State + return underlying_job + except AttributeError: + logger.warning(f'Retrieved object is not a valid job. ID: {job_id}') + return None + return None + + def _receive_job_output( + self, job: System.Management.Automation.Job, keep: bool = False + ) -> tuple[str, list[str]]: + """Receives output and errors from a job.""" + if not job: + return '', [] + + output_parts = [] + error_parts = [] + + # Get error stream directly from job object if available + try: + current_job_obj = self._get_job_object(job.Id) + if current_job_obj and current_job_obj.Error: + error_records = current_job_obj.Error.ReadAll() + if error_records: + error_parts.extend([str(e) for e in error_records]) + except Exception as read_err: + logger.error( + f'Failed to read job error stream directly for Job {job.Id}: {read_err}' + ) + error_parts.append(f'[Direct Error Stream Read Exception: {read_err}]') + + # Run Receive-Job for the output stream + keep_switch = '-Keep' if keep else '' + script = f'Receive-Job -Job (Get-Job -Id {job.Id}) {keep_switch}' + + ps_receive = None + try: + ps_receive = PowerShell.Create() + ps_receive.Runspace = self.runspace + ps_receive.AddScript(script) + + # Collect output + results = ps_receive.Invoke() + if results: + output_parts = [str(r) for r in results] + + # Collect errors from the Receive-Job command + if ps_receive.Streams.Error: + receive_job_errors = [str(e) for e in ps_receive.Streams.Error] + logger.warning( + f'Errors during Receive-Job for Job ID {job.Id}: {receive_job_errors}' + ) + error_parts.extend(receive_job_errors) + + except Exception as e: + logger.error(f'Exception during Receive-Job for Job ID {job.Id}: {e}') + error_parts.append(f'[Receive-Job Exception: {e}]') + finally: + if ps_receive: + ps_receive.Dispose() + + final_combined_output = '\n'.join(output_parts) + return final_combined_output, error_parts + + def _stop_active_job(self) -> CmdOutputObservation | ErrorObservation: + """Stops the active job, collects final output, and cleans up.""" + with self._job_lock: + job = self.active_job + if not job: + return ErrorObservation( + content='ERROR: No previous running command to interact with.' + ) + + job_id = job.Id # type: ignore[unreachable] + logger.info(f'Attempting to stop job ID: {job_id} via C-c.') + + # Attempt graceful stop + stop_script = f'Stop-Job -Job (Get-Job -Id {job_id})' + self._run_ps_command(stop_script) + + # Allow process time to potentially print shutdown messages + time.sleep(0.5) + + # Get final output and errors + final_output, final_errors = self._receive_job_output(job, keep=False) + + combined_output = final_output + combined_errors = final_errors + + # Check job state after stopping + final_job = self._get_job_object(job_id) + final_state = final_job.JobStateInfo.State if final_job else JobState.Failed + + logger.info(f'Job {job_id} final state after stop attempt: {final_state}') + + # Clean up the job + remove_script = f'Remove-Job -Job (Get-Job -Id {job_id})' + self._run_ps_command(remove_script) + + # Clear the active job reference + self.active_job = None + + # Construct result + output_builder = [combined_output] if combined_output else [] + if combined_errors: + output_builder.append('\n[ERROR STREAM]') + output_builder.extend(combined_errors) + + # Determine exit code - 0 if Stopped/Completed, 1 otherwise + exit_code = ( + 0 if final_state in [JobState.Stopped, JobState.Completed] else 1 + ) + + final_content = '\n'.join(output_builder).strip() + + current_cwd = self._cwd + python_safe_cwd = current_cwd.replace('\\\\', '\\\\\\\\') + metadata = CmdOutputMetadata( + exit_code=exit_code, working_dir=python_safe_cwd + ) + metadata.suffix = f'\n[The command completed with exit code {exit_code}. CTRL+C was sent.]' + + return CmdOutputObservation( + content=final_content, + command='C-c', + metadata=metadata, + ) + + def _check_active_job( + self, timeout_seconds: int + ) -> CmdOutputObservation | ErrorObservation: + """ + Checks the active job for new output and status, waiting up to timeout_seconds. + """ + with self._job_lock: + if not self.active_job: + return ErrorObservation( + content='ERROR: No previous running command to retrieve logs from.' + ) + + job_id = self.active_job.Id # type: ignore[unreachable] + logger.info( + f'Checking active job ID: {job_id} for new output (timeout={timeout_seconds}s).' + ) + + start_time = time.monotonic() + monitoring_loop_finished = False + accumulated_new_output_builder = [] + accumulated_new_errors = [] + exit_code = -1 # Assume running + final_state = JobState.Running + latest_cumulative_output = self._last_job_output + latest_cumulative_errors = list(self._last_job_error) + + while not monitoring_loop_finished: + if not should_continue(): + logger.warning('Shutdown signal received during job check.') + monitoring_loop_finished = True + continue + + elapsed_seconds = time.monotonic() - start_time + if elapsed_seconds > timeout_seconds: + logger.warning(f'Job check timed out after {timeout_seconds}s.') + monitoring_loop_finished = True + continue + + current_job_obj = self._get_job_object(job_id) + if not current_job_obj: + logger.error(f'Job {job_id} object disappeared during check.') + accumulated_new_errors.append('[Job object lost during check]') + monitoring_loop_finished = True + exit_code = 1 + final_state = JobState.Failed + if self.active_job and self.active_job.Id == job_id: + self.active_job = None + continue + + # Poll output with keep=True (returns cumulative output/errors) + polled_cumulative_output, polled_cumulative_errors = ( + self._receive_job_output(current_job_obj, keep=True) + ) + + # Detect new output since last poll + new_output_detected = '' + if polled_cumulative_output != latest_cumulative_output: + if polled_cumulative_output.startswith(latest_cumulative_output): + new_output_detected = polled_cumulative_output[ + len(latest_cumulative_output) : + ] + else: + logger.warning( + f'Job {job_id} check: Cumulative output changed unexpectedly' + ) + new_output_detected = polled_cumulative_output.removeprefix( + self._last_job_output + ) + + if new_output_detected.strip(): + accumulated_new_output_builder.append( + new_output_detected.strip() + ) + + # Detect new errors + latest_cumulative_errors_set = set(latest_cumulative_errors) + new_errors_detected = [ + e + for e in polled_cumulative_errors + if e not in latest_cumulative_errors_set + ] + if new_errors_detected: + accumulated_new_errors.extend(new_errors_detected) + + latest_cumulative_output = polled_cumulative_output + latest_cumulative_errors = polled_cumulative_errors + + # Check job state + current_state = current_job_obj.JobStateInfo.State + if current_state not in [JobState.Running, JobState.NotStarted]: + logger.info( + f'Job {job_id} finished check loop with state: {current_state}' + ) + monitoring_loop_finished = True + final_state = current_state + continue + + time.sleep(0.1) # Prevent busy-waiting + + # Process results after loop finished + is_finished = final_state not in [JobState.Running, JobState.NotStarted] + final_content = '\n'.join(accumulated_new_output_builder).strip() + final_errors = list(accumulated_new_errors) + + if is_finished: + logger.info(f'Job {job_id} has finished. Collecting final output.') + final_job_obj = self._get_job_object(job_id) + if final_job_obj: + # Final receive with keep=False to consume remaining output + final_cumulative_output, final_cumulative_errors = ( + self._receive_job_output(final_job_obj, keep=False) + ) + + # Check for new output in final chunk + final_new_output_chunk = '' + if final_cumulative_output.startswith(latest_cumulative_output): + final_new_output_chunk = final_cumulative_output[ + len(latest_cumulative_output) : + ] + elif final_cumulative_output: + final_new_output_chunk = final_cumulative_output.removeprefix( + self._last_job_output + ) + + if final_new_output_chunk.strip(): + final_content = '\n'.join( + filter( + None, [final_content, final_new_output_chunk.strip()] + ) + ) + + # Check for new errors in final chunk + latest_cumulative_errors_set = set(latest_cumulative_errors) + new_final_errors = [ + e + for e in final_cumulative_errors + if e not in latest_cumulative_errors_set + ] + if new_final_errors: + final_errors.extend(new_final_errors) + + # Determine exit code based on state + exit_code = 0 if final_state == JobState.Completed else 1 + + # Clean up job + remove_script = f'Remove-Job -Job (Get-Job -Id {job_id})' + self._run_ps_command(remove_script) + if self.active_job and self.active_job.Id == job_id: + self.active_job = None + self._last_job_output = '' + self._last_job_error = [] + else: + logger.warning(f'Could not get final job object {job_id}') + exit_code = 1 + if self.active_job and self.active_job.Id == job_id: + self.active_job = None + self._last_job_output = '' + self._last_job_error = [] + else: + # Update persistent state with latest cumulative values + self._last_job_output = latest_cumulative_output + self._last_job_error = list(set(latest_cumulative_errors)) + + # Append errors to final content + if final_errors: + error_stream_text = '\n'.join(final_errors) + if final_content: + final_content += f'\n[ERROR STREAM]\n{error_stream_text}' + else: + final_content = f'[ERROR STREAM]\n{error_stream_text}' + # Ensure exit code is non-zero if errors occurred + if exit_code == 0 and final_state != JobState.Completed: + exit_code = 1 + + current_cwd = self._cwd + python_safe_cwd = current_cwd.replace('\\\\', '\\\\\\\\') + metadata = CmdOutputMetadata( + exit_code=exit_code, working_dir=python_safe_cwd + ) + metadata.prefix = '[Below is the output of the previous command.]\n' + + if is_finished: + metadata.suffix = ( + f'\n[The command completed with exit code {exit_code}.]' + ) + else: + metadata.suffix = ( + f'\n[The command timed out after {timeout_seconds} seconds. ' + "You may wait longer to see additional output by sending empty command '', " + 'send other commands to interact with the current process, ' + 'or send keys to interrupt/kill the command.]' + ) + + return CmdOutputObservation( + content=final_content, + command='', + metadata=metadata, + ) + + def _get_current_cwd(self) -> str: + """Gets the current working directory from the runspace.""" + # Use helper to run Get-Location + results = self._run_ps_command('Get-Location') + + # --- Add more detailed check logging --- + if results and results.Count > 0: # type: ignore[attr-defined] + first_result = results[0] + has_path_attr = hasattr(first_result, 'Path') + + if has_path_attr: + # Original logic resumes here if hasattr is True + fetched_cwd = str(first_result.Path) + if os.path.isdir(fetched_cwd): + if fetched_cwd != self._cwd: + logger.info( + f"_get_current_cwd: Fetched CWD '{fetched_cwd}' differs from cached '{self._cwd}'. Updating cache." + ) + self._cwd = fetched_cwd + return self._cwd + else: + logger.warning( + f"_get_current_cwd: Path '{fetched_cwd}' is not a valid directory. Returning cached CWD: {self._cwd}" + ) + return self._cwd + else: + # Handle cases where Path attribute is missing (e.g., unexpected object type) + # Maybe the path is in BaseObject? + try: + base_object = first_result.BaseObject + if hasattr(base_object, 'Path'): + fetched_cwd = str(base_object.Path) + if os.path.isdir(fetched_cwd): + if fetched_cwd != self._cwd: + logger.info( + f"_get_current_cwd: Fetched CWD '{fetched_cwd}' (from BaseObject) differs from cached '{self._cwd}'. Updating cache." + ) + self._cwd = fetched_cwd + return self._cwd + else: + logger.warning( + f"_get_current_cwd: Path '{fetched_cwd}' (from BaseObject) is not a valid directory. Returning cached CWD: {self._cwd}" + ) + return self._cwd + else: + logger.error( + f'_get_current_cwd: BaseObject also lacks Path attribute. Cannot determine CWD from result: {first_result}' + ) + return self._cwd # Return cached + except AttributeError as ae: + logger.error( + f'_get_current_cwd: Error accessing BaseObject or its Path: {ae}. Result: {first_result}' + ) + return self._cwd # Return cached + except Exception as ex: + logger.error( + f'_get_current_cwd: Unexpected error checking BaseObject: {ex}. Result: {first_result}' + ) + return self._cwd # Return cached + + # This path is taken if _run_ps_command returned [] or results.Count was 0 + logger.error( + f'_get_current_cwd: No valid results received from Get-Location call. Returning cached CWD: {self._cwd}' + ) + return self._cwd + + def execute(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation: + """ + Executes a command, potentially as a PowerShell background job for long-running tasks. + Aligned with bash.py behavior regarding command execution and messages. + + Args: + action: The command execution action. + + Returns: + CmdOutputObservation or ErrorObservation. + """ + if not self._initialized or self._closed: + return ErrorObservation( + content='PowerShell session is not initialized or has been closed.' + ) + + command = action.command.strip() + timeout_seconds = action.timeout or 60 # Default to 60 seconds hard timeout + is_input = action.is_input # Check if it's intended as input + + # Detect if this is a background command (ending with &) + run_in_background = False + if command.endswith('&'): + run_in_background = True + command = command[:-1].strip() # Remove the & and extra spaces + logger.info(f"Detected background command: '{command}'") + + logger.info( + f"Received command: '{command}', Timeout: {timeout_seconds}s, is_input: {is_input}, background: {run_in_background}" + ) + + # --- Simplified Active Job Handling (aligned with bash.py) --- + with self._job_lock: + if self.active_job: + active_job_obj = self._get_job_object(self.active_job.Id) # type: ignore[unreachable] + job_is_finished = False + final_output = '' # Initialize before conditional assignment + final_errors = [] # Initialize before conditional assignment + current_job_state = None # Initialize + finished_job_id = ( + self.active_job.Id + ) # Store ID before potentially clearing self.active_job + + if active_job_obj: + current_job_state = active_job_obj.JobStateInfo.State + if current_job_state not in [JobState.Running, JobState.NotStarted]: + job_is_finished = True + logger.info( + f'Active job {finished_job_id} was finished ({current_job_state}) before receiving new command. Cleaning up.' + ) + # Assign final output/errors here + final_output, final_errors = self._receive_job_output( + active_job_obj, keep=False + ) # Consume final output + remove_script = ( + f'Remove-Job -Job (Get-Job -Id {finished_job_id})' + ) + self._run_ps_command(remove_script) + # --- Reset persistent state --- + self._last_job_output = '' + self._last_job_error = [] + self.active_job = None + # else: job still running, job_is_finished remains False + else: + # Job object disappeared, consider it finished/gone + logger.warning( + f'Could not retrieve active job object {finished_job_id}. Assuming finished and clearing.' + ) + job_is_finished = True + current_job_state = ( + JobState.Failed + ) # Assume failed if object is gone + # Assign final output/errors here + final_output = '' # No output retrievable + final_errors = ['[ERROR: Job object disappeared during check]'] + # --- Reset persistent state --- + self._last_job_output = '' + self._last_job_error = [] + self.active_job = None + + # If the job was found to be finished *during this check*, return its final state now. + if job_is_finished: + # --- Calculate final new output/errors --- + new_output = final_output.removeprefix( + self._last_job_output + ) # final_output was from keep=False + last_error_set = set( + self._last_job_error + ) # Use the state *before* reset + new_errors = [e for e in final_errors if e not in last_error_set] + + # Construct and return the observation for the completed job using the state captured during cleanup + exit_code = 0 if current_job_state == JobState.Completed else 1 + output_builder = [new_output] if new_output else [] + if new_errors: + output_builder.append('\\n[ERROR STREAM]') + output_builder.extend(new_errors) + content_for_return = '\\n'.join(output_builder).strip() + + current_cwd = self._cwd # Use cached CWD as job is gone + python_safe_cwd = current_cwd.replace('\\\\', '\\\\\\\\') + metadata = CmdOutputMetadata( + exit_code=exit_code, working_dir=python_safe_cwd + ) + # Indicate this output is from the *previous* command that just finished. + metadata.prefix = ( + '[Below is the output of the previous command.]\\n' + ) + metadata.suffix = ( + f'\\n[The command completed with exit code {exit_code}.]' + ) + logger.info( + f"Returning final output for job {finished_job_id} which finished before command '{command}' was processed." + ) # Use finished_job_id + return CmdOutputObservation( + content=content_for_return, + command=action.command, # The command that triggered this check (e.g., '') + metadata=metadata, + ) + + # If job was NOT finished, check incoming command + # This block only runs if the job is still active (job_is_finished is False) + if not job_is_finished: + if command == '': + logger.info( + 'Received empty command while job running. Checking job status.' + ) + # Pass the timeout from the empty command action to _check_active_job + return self._check_active_job(timeout_seconds) + elif command == 'C-c': + logger.info('Received C-c while job running. Stopping job.') + return self._stop_active_job() + elif is_input: + # PowerShell session doesn't directly support stdin injection like bash.py/tmux + # This requires a different approach (e.g., named pipes, or specific cmdlets). + # For now, return an error indicating this limitation. + logger.warning( + f"Received input command '{command}' while job active, but direct input injection is not supported in this implementation." + ) + # Get *new* output since last observation to provide context + cumulative_output, cumulative_errors = self._receive_job_output( + self.active_job, keep=True + ) + new_output = cumulative_output.removeprefix( + self._last_job_output + ) + last_error_set = set(self._last_job_error) + new_errors = [ + e for e in cumulative_errors if e not in last_error_set + ] + output_builder = [new_output] if new_output else [] + if new_errors: + output_builder.append('\\n[ERROR STREAM]') + output_builder.extend(new_errors) + # --- UPDATE persistent state --- + # Even though input fails, the user saw this output now + self._last_job_output = cumulative_output + self._last_job_error = list(set(cumulative_errors)) + current_cwd = self._cwd + python_safe_cwd = current_cwd.replace('\\\\', '\\\\\\\\') + metadata = CmdOutputMetadata( + exit_code=-1, working_dir=python_safe_cwd + ) # Still running + metadata.prefix = ( + '[Below is the output of the previous command.]\\n' + ) + metadata.suffix = ( + f"\\n[Your input command '{command}' was NOT processed. Direct input to running processes (is_input=True) " + 'is not supported by this PowerShell session implementation. You can use C-c to stop the process.]' + ) + return CmdOutputObservation( + content='\\n'.join(output_builder).strip(), + command=action.command, + metadata=metadata, + ) + + else: + # Any other command arrives while a job is running -> Reject it (bash.py behavior) + logger.warning( + f"Received new command '{command}' while job {self.active_job.Id} is active. New command NOT executed." + ) + # Get *new* output since last observation to provide context + cumulative_output, cumulative_errors = self._receive_job_output( + self.active_job, keep=True + ) + new_output = cumulative_output.removeprefix( + self._last_job_output + ) + last_error_set = set(self._last_job_error) + new_errors = [ + e for e in cumulative_errors if e not in last_error_set + ] + output_builder = [new_output] if new_output else [] + if new_errors: + output_builder.append('\\n[ERROR STREAM]') + output_builder.extend(new_errors) + # --- UPDATE persistent state --- + # Even though command fails, the user saw this output now + self._last_job_output = cumulative_output + self._last_job_error = list(set(cumulative_errors)) + + current_cwd = self._cwd # Use cached CWD + python_safe_cwd = current_cwd.replace('\\\\', '\\\\\\\\') + metadata = CmdOutputMetadata( + exit_code=-1, working_dir=python_safe_cwd + ) # Exit code -1 indicates still running + metadata.prefix = ( + '[Below is the output of the previous command.]\n' + ) + metadata.suffix = ( + f'\n[Your command "{command}" is NOT executed. ' + f'The previous command is still running - You CANNOT send new commands until the previous command is completed. ' + 'By setting `is_input` to `true`, you can interact with the current process: ' + "You may wait longer to see additional output of the previous command by sending empty command '', " + 'send other commands to interact with the current process, ' + 'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]' + ) + + return CmdOutputObservation( + content='\\n'.join(output_builder).strip(), + command=action.command, # Return the command that was attempted + metadata=metadata, + ) + # --- End Active Job Handling --- + + # --- If we reach here, there is no active job --- + + # Handle empty command when NO job is active + if command == '': + logger.warning('Received empty command string (no active job).') + current_cwd = self._get_current_cwd() # Update CWD just in case + python_safe_cwd = current_cwd.replace('\\\\', '\\\\\\\\') + metadata = CmdOutputMetadata(exit_code=0, working_dir=python_safe_cwd) + # Align error message with bash.py + error_content = 'ERROR: No previous running command to retrieve logs from.' + logger.warning( + f'Returning specific error message for empty command: {error_content}' + ) + # No extra suffix needed + # metadata.suffix = f"\n[Empty command received (no active job). CWD: {metadata.working_dir}]" + return CmdOutputObservation( + content=error_content, command='', metadata=metadata + ) + + # Handle C-* when NO job is active/relevant + if command.startswith('C-') and len(command) == 3: + logger.warning( + f'Received control character command: {command}. Not supported when no job active.' + ) + current_cwd = self._cwd # Use cached CWD + python_safe_cwd = current_cwd.replace('\\\\', '\\\\\\\\') + # Align error message with bash.py (no running command to interact with) + return ErrorObservation( + content='ERROR: No previous running command to interact with.' + ) + + # --- Validate command structure using PowerShell Parser --- + # (Keep existing validation logic as it's PowerShell specific and useful) + parse_errors = None + statements = None + try: + # Parse the input command string + ast, _, parse_errors = Parser.ParseInput(command, None) + if parse_errors and parse_errors.Length > 0: + error_messages = '\n'.join( + [ + f' - {err.Message} at Line {err.Extent.StartLineNumber}, Column {err.Extent.StartColumnNumber}' + for err in parse_errors + ] + ) + logger.error(f'Command failed PowerShell parsing:\n{error_messages}') + return ErrorObservation( + content=( + f'ERROR: Command could not be parsed by PowerShell.\n' + f'Syntax errors detected:\n{error_messages}' + ) + ) + statements = ast.EndBlock.Statements + if statements.Count > 1: + logger.error( + f'Detected {statements.Count} statements in the command. Only one is allowed.' + ) + # Align error message with bash.py + splited_cmds = [ + str(s.Extent.Text) for s in statements + ] # Try to get text + return ErrorObservation( + content=( + f'ERROR: Cannot execute multiple commands at once.\n' + f'Please run each command separately OR chain them into a single command via PowerShell operators (e.g., ; or |).\n' + f'Detected commands:\n{"\n".join(f"({i+1}) {cmd}" for i, cmd in enumerate(splited_cmds))}' + ) + ) + elif statements.Count == 0 and not command.strip().startswith('#'): + logger.warning( + 'Received command that resulted in zero executable statements (likely whitespace or comment).' + ) + # Treat as empty command if it parses to nothing + return CmdOutputObservation( + content='', + command=command, + metadata=CmdOutputMetadata(exit_code=0, working_dir=self._cwd), + ) + + except Exception as parse_ex: + logger.error(f'Exception during PowerShell command parsing: {parse_ex}') + logger.error(traceback.format_exc()) + return ErrorObservation( + content=f'ERROR: An exception occurred while parsing the command: {parse_ex}' + ) + # --- End validation --- + + # === Synchronous Execution Path (for CWD commands) === + if statements and statements.Count == 1: + statement = statements[0] + try: + from System.Management.Automation.Language import ( + CommandAst, + PipelineAst, + ) + + # Check PipelineAst + if isinstance(statement, PipelineAst): + pipeline_elements = statement.PipelineElements + if ( + pipeline_elements + and pipeline_elements.Count == 1 + and isinstance(pipeline_elements[0], CommandAst) + ): + command_ast = pipeline_elements[0] + command_name = command_ast.GetCommandName() + if command_name and command_name.lower() in [ + 'set-location', + 'cd', + 'push-location', + 'pop-location', + ]: + logger.info( + f'execute: Identified CWD command via PipelineAst: {command_name}' + ) + # Run command and prepare proper CmdOutputObservation + ps_results = self._run_ps_command(command) + # Get current working directory after CWD command + current_cwd = self._get_current_cwd() + python_safe_cwd = current_cwd.replace('\\\\', '\\\\\\\\') + + # Convert results to string output if any + output = ( + '\n'.join([str(r) for r in ps_results]) + if ps_results + else '' + ) + + return CmdOutputObservation( + content=output, + command=command, + metadata=CmdOutputMetadata( + exit_code=0, working_dir=python_safe_cwd + ), + ) + # Check direct CommandAst + elif isinstance(statement, CommandAst): + command_name = statement.GetCommandName() + if command_name and command_name.lower() in [ + 'set-location', + 'cd', + 'push-location', + 'pop-location', + ]: + logger.info( + f'execute: Identified CWD command via direct CommandAst: {command_name}' + ) + # Run command and prepare proper CmdOutputObservation + ps_results = self._run_ps_command(command) + # Get current working directory after CWD command + current_cwd = self._get_current_cwd() + python_safe_cwd = current_cwd.replace('\\\\', '\\\\\\\\') + + # Convert results to string output if any + output = ( + '\n'.join([str(r) for r in ps_results]) + if ps_results + else '' + ) + + return CmdOutputObservation( + content=output, + command=command, + metadata=CmdOutputMetadata( + exit_code=0, working_dir=python_safe_cwd + ), + ) + except ImportError as imp_err: + logger.error( + f'execute: Failed to import CommandAst: {imp_err}. Cannot check for CWD commands.' + ) + except Exception as ast_err: + logger.error(f'execute: Error checking command AST: {ast_err}') + + # === Asynchronous Execution Path (for non-CWD commands) === + logger.info( + f"execute: Entering asynchronous execution path for command: '{command}'" + ) + + # --- Start the command as a new asynchronous job --- + # Reset state for the new job + self._last_job_output = '' + self._last_job_error = [] + + ps_start = None + job = None + output_builder = [] + all_errors = [] + exit_code = 1 + timed_out = False + job_start_failed = False + job_id = None + + try: + ps_start = PowerShell.Create() + ps_start.Runspace = self.runspace + escaped_cwd = self._cwd.replace("'", "''") + # Check $? after the command. If it's false, exit 1. + start_job_script = f"Start-Job -ScriptBlock {{ Set-Location '{escaped_cwd}'; {command}; if (-not $?) {{ exit 1 }} }}" + + logger.info(f'Starting command as PowerShell job: {command}') + ps_start.AddScript(start_job_script) + start_results = ps_start.Invoke() + + if ps_start.Streams.Error: + errors = [str(e) for e in ps_start.Streams.Error] + logger.error(f'Errors during Start-Job execution: {errors}') + all_errors.extend(errors) + + ps_get = PowerShell.Create() + ps_get.Runspace = self.runspace + get_job_script = 'Get-Job | Sort-Object -Property Id -Descending | Select-Object -First 1' + ps_get.AddScript(get_job_script) + get_results = ps_get.Invoke() + + if ps_get.Streams.Error: + errors = [str(e) for e in ps_get.Streams.Error] + logger.error(f'Errors getting latest job: {errors}') + all_errors.extend(errors) + job_start_failed = True + + if not job_start_failed and get_results and len(get_results) > 0: + potential_job = get_results[0] + try: + underlying_job = potential_job.BaseObject + job_state_test = underlying_job.JobStateInfo.State + job = underlying_job + job_id = job.Id + + # For background commands, don't track the job in the session + if not run_in_background: + with self._job_lock: + self.active_job = job + + logger.info( + f'Job retrieved successfully. Job ID: {job.Id}, State: {job_state_test}, Background: {run_in_background}' + ) + + if job_state_test == JobState.Failed: + logger.error(f'Job {job.Id} failed immediately after starting.') + output_chunk, error_chunk = self._receive_job_output( + job, keep=False + ) + if output_chunk: + output_builder.append(output_chunk) + if error_chunk: + all_errors.extend(error_chunk) + job_start_failed = True + remove_script = f'Remove-Job -Job (Get-Job -Id {job.Id})' + self._run_ps_command(remove_script) + with self._job_lock: + self.active_job = None + except AttributeError as e: + logger.error( + f'Get-Job returned an object without expected properties on BaseObject: {e}' + ) + logger.error(traceback.format_exc()) + all_errors.append('Get-Job did not return a valid Job object.') + job_start_failed = True + + elif not job_start_failed: + logger.error('Get-Job did not return any results.') + all_errors.append('Get-Job did not return any results.') + job_start_failed = True + + except Exception as start_ex: + logger.error(f'Exception during job start/retrieval: {start_ex}') + logger.error(traceback.format_exc()) + all_errors.append(f'[Job Start/Get Exception: {start_ex}]') + job_start_failed = True + finally: + if ps_start: + ps_start.Dispose() + if 'ps_get' in locals() and ps_get: + ps_get.Dispose() + + if job_start_failed: + current_cwd = self._get_current_cwd() + python_safe_cwd = current_cwd.replace('\\\\', '\\\\\\\\') + metadata = CmdOutputMetadata(exit_code=1, working_dir=python_safe_cwd) + # Use ErrorObservation for critical failures like job start + return ErrorObservation( + content='Failed to start PowerShell job.\n[ERRORS]\n' + + '\n'.join(all_errors) + ) + + # For background commands, return immediately with success + if run_in_background: + current_cwd = self._get_current_cwd() + python_safe_cwd = current_cwd.replace('\\\\', '\\\\\\\\') + metadata = CmdOutputMetadata(exit_code=0, working_dir=python_safe_cwd) + metadata.suffix = f'\n[Command started as background job {job_id}.]' + return CmdOutputObservation( + content=f'[Started background job {job_id}]', + command=f'{command} &', + metadata=metadata, + ) + + # --- Monitor the Job --- + start_time = time.monotonic() + monitoring_loop_finished = False + shutdown_requested = False + final_state = JobState.Failed + + latest_cumulative_output = ( + '' # Tracks the absolute latest cumulative output seen in this loop + ) + latest_cumulative_errors = [] # Tracks the absolute latest cumulative errors seen in this loop + + while not monitoring_loop_finished: + if not should_continue(): + logger.warning('Shutdown signal received during job monitoring.') + shutdown_requested = True + monitoring_loop_finished = True + exit_code = -1 + continue + + elapsed_seconds = time.monotonic() - start_time + if elapsed_seconds > timeout_seconds: + logger.warning( + f'Command job monitoring exceeded timeout ({timeout_seconds}s). Leaving job running.' + ) + timed_out = True + monitoring_loop_finished = True + exit_code = -1 + continue + + current_job_obj = self._get_job_object(job_id) + if not current_job_obj: + logger.error(f'Job {job_id} object disappeared during monitoring.') + all_errors.append('[Job object lost during monitoring]') + monitoring_loop_finished = True + exit_code = 1 + final_state = JobState.Failed + # Reset state as job is gone + self._last_job_output = '' + self._last_job_error = [] + continue + + # Poll output (keep=True) -> Returns CUMULATIVE output/errors + polled_cumulative_output, polled_cumulative_errors = ( + self._receive_job_output(current_job_obj, keep=True) + ) + + # Update the latest cumulative state seen in this loop + latest_cumulative_output = polled_cumulative_output + latest_cumulative_errors = polled_cumulative_errors + + # Check job state + current_state = current_job_obj.JobStateInfo.State + if current_state not in [JobState.Running, JobState.NotStarted]: + logger.info( + f'Job {job_id} finished monitoring loop with state: {current_state}' + ) + monitoring_loop_finished = True + final_state = current_state + continue + + time.sleep(0.1) + + # --- Monitoring loop finished --- + + job_finished_naturally = ( + not timed_out + and not shutdown_requested + and final_state in [JobState.Completed, JobState.Stopped, JobState.Failed] + ) + + determined_cwd = self._cwd + final_output_content = '' + final_error_content = [] + + if job_finished_naturally: + logger.info( + f'Job {job_id} finished naturally with state: {final_state}. Clearing final output buffer.' + ) + final_cumulative_output = '' + final_cumulative_errors: list[str] = [] + final_job_obj = self._get_job_object(job_id) + if final_job_obj: + # Get final output/errors with keep=False + final_cumulative_output, final_cumulative_errors = ( + self._receive_job_output(final_job_obj, keep=False) + ) + # Always calculate the output relative to the last observation returned + final_output_content = final_cumulative_output.removeprefix( + self._last_job_output + ) + # Also calculate final errors relative to last observation returned + last_error_set = set(self._last_job_error) + final_error_content = [ + e for e in final_cumulative_errors if e not in last_error_set + ] + else: + logger.warning( + f'Could not get final job object {job_id} to clear output buffer.' + ) + # If object is gone, output is what was last seen relative to last observation + final_output_content = latest_cumulative_output.removeprefix( + self._last_job_output + ) + last_error_set = set(self._last_job_error) + final_error_content = [ + e for e in latest_cumulative_errors if e not in last_error_set + ] + + exit_code = 0 if final_state == JobState.Completed else 1 + + if final_state == JobState.Completed: + logger.info(f'Job {job_id} completed successfully. Querying final CWD.') + determined_cwd = self._get_current_cwd() + else: + logger.info( + f'Job {job_id} finished but did not complete successfully ({final_state}). Using cached CWD: {self._cwd}' + ) + determined_cwd = self._cwd + + with self._job_lock: # Lock to clear active_job + remove_script = f'Remove-Job -Job (Get-Job -Id {job_id})' + self._run_ps_command(remove_script) + self.active_job = None + logger.info(f'Cleaned up finished job {job_id}') + + else: + logger.info( + f'Job {job_id} did not finish naturally (timeout={timed_out}, shutdown={shutdown_requested}). Using cached CWD: {self._cwd}' + ) + determined_cwd = self._cwd + # Exit code is already -1 from loop exit reason + + # --- Calculate new output/errors relative to last observation (using latest from loop) --- + final_output_content = latest_cumulative_output.removeprefix( + self._last_job_output + ) + final_error_content = [ + e for e in latest_cumulative_errors if e not in self._last_job_error + ] + + # --- Update persistent state --- + self._last_job_output = latest_cumulative_output + self._last_job_error = list( + set(latest_cumulative_errors) + ) # Store unique errors + + python_safe_cwd = determined_cwd.replace('\\\\', '\\\\\\\\') + + # Combine unique output chunks for final observation + # Using a set ensures uniqueness if chunks were identical across polls + # Join accumulated output_builder parts + final_output = final_output_content + if final_error_content: # Use the calculated final *new* errors + error_stream_text = '\n'.join(final_error_content) + if final_output: + final_output += f'\n[ERROR STREAM]\n{error_stream_text}' + else: + final_output = f'[ERROR STREAM]\n{error_stream_text}' + if exit_code == 0: # Only check exit code if job finished naturally + logger.info( + f'Detected errors in stream ({len(final_error_content)} records) but job state was Completed. Forcing exit_code to 1.' + ) + exit_code = 1 + + # Create metadata + metadata = CmdOutputMetadata(exit_code=exit_code, working_dir=python_safe_cwd) + + # Determine Suffix + if timed_out: + # Align suffix with bash.py timeout message + suffix = ( + f'\n[The command timed out after {timeout_seconds} seconds. ' + "You may wait longer to see additional output by sending empty command '', " + 'send other commands to interact with the current process, ' + 'or send keys to interrupt/kill the command.]' + ) + elif shutdown_requested: + # Align suffix with bash.py equivalent (though bash.py might not have specific shutdown message) + suffix = f'\n[Command execution cancelled due to shutdown signal. Exit Code: {exit_code}]' + elif job_finished_naturally: + # Align suffix with bash.py completed message + suffix = f'\n[The command completed with exit code {exit_code}.]' + else: # Should not happen, but defensive fallback + suffix = f'\n[Command execution finished. State: {final_state}, Exit Code: {exit_code}]' + + metadata.suffix = suffix + + return CmdOutputObservation( + content=final_output, command=command, metadata=metadata + ) + + def close(self): + """Closes the PowerShell runspace and releases resources, stopping any active job.""" + if self._closed: + return + + logger.info('Closing PowerShell session runspace.') + + # Stop and remove any active job before closing runspace + with self._job_lock: + if self.active_job: + logger.warning( # type: ignore[unreachable] + f'Session closing with active job {self.active_job.Id}. Attempting to stop and remove.' + ) + job_id = self.active_job.Id + try: + # Ensure job object exists before trying to stop/remove + active_job_obj = self._get_job_object(job_id) + if active_job_obj: + stop_script = f'Stop-Job -Job (Get-Job -Id {job_id})' + self._run_ps_command( + stop_script + ) # Use helper before runspace closes + time.sleep(0.1) + remove_script = f'Remove-Job -Job (Get-Job -Id {job_id})' + self._run_ps_command(remove_script) + logger.info( + f'Stopped and removed active job {job_id} during close.' + ) + else: + logger.warning( + f'Could not find job object {job_id} to stop/remove during close.' + ) + except Exception as e: + logger.error( + f'Error stopping/removing job {job_id} during close: {e}' + ) + # --- Reset state even if stop/remove failed --- + self._last_job_output = '' + self._last_job_error = [] + self.active_job = None + + if hasattr(self, 'runspace') and self.runspace: + try: + # Check state using System.Management.Automation.Runspaces namespace + # Get the state info object first to avoid potential pythonnet issues with nested access + runspace_state_info = self.runspace.RunspaceStateInfo + if runspace_state_info.State == RunspaceState.Opened: + self.runspace.Close() + self.runspace.Dispose() + logger.info('PowerShell runspace closed and disposed.') + except Exception as e: + logger.error(f'Error closing/disposing PowerShell runspace: {e}') + logger.error(traceback.format_exc()) + + self.runspace = None + self._initialized = False + self._closed = True + + def __del__(self): + """Destructor ensures the runspace is closed.""" + self.close() diff --git a/poetry.lock b/poetry.lock index 17cd2800fd..016f1722b5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1457,6 +1457,21 @@ files = [ {file = "cloudpickle-3.1.1.tar.gz", hash = "sha256:b216fa8ae4019d5482a8ac3c95d8f6346115d8835911fd4aefd1a445e4242c64"}, ] +[[package]] +name = "clr-loader" +version = "0.2.7.post0" +description = "Generic pure Python loader for .NET runtimes" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "clr_loader-0.2.7.post0-py3-none-any.whl", hash = "sha256:e0b9fcc107d48347a4311a28ffe3ae78c4968edb216ffb6564cb03f7ace0bb47"}, + {file = "clr_loader-0.2.7.post0.tar.gz", hash = "sha256:b7a8b3f8fbb1bcbbb6382d887e21d1742d4f10b5ea209e4ad95568fe97e1c7c6"}, +] + +[package.dependencies] +cffi = {version = ">=1.17", markers = "python_version >= \"3.8\""} + [[package]] name = "colorama" version = "0.4.6" @@ -2856,7 +2871,7 @@ grpcio = {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_versi grpcio-status = {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""} proto-plus = [ {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, - {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, + {version = ">=1.22.3,<2.0.0dev"}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" @@ -3071,7 +3086,7 @@ google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" grpc-google-iam-v1 = ">=0.14.0,<1.0.0dev" proto-plus = [ {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, - {version = ">=1.22.3,<2.0.0dev"}, + {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, ] protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" @@ -7663,6 +7678,21 @@ asyncio-client = ["aiohttp (>=3.4)"] client = ["requests (>=2.21.0)", "websocket-client (>=0.54.0)"] docs = ["sphinx"] +[[package]] +name = "pythonnet" +version = "3.0.5" +description = ".NET and Mono integration for Python" +optional = false +python-versions = "<3.14,>=3.7" +groups = ["main"] +files = [ + {file = "pythonnet-3.0.5-py3-none-any.whl", hash = "sha256:f6702d694d5d5b163c9f3f5cc34e0bed8d6857150237fae411fefb883a656d20"}, + {file = "pythonnet-3.0.5.tar.gz", hash = "sha256:48e43ca463941b3608b32b4e236db92d8d40db4c58a75ace902985f76dac21cf"}, +] + +[package.dependencies] +clr_loader = ">=0.2.7,<0.3.0" + [[package]] name = "pytz" version = "2025.1" @@ -11117,5 +11147,5 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" -python-versions = "^3.12" -content-hash = "83ef9642a936252ac11e6625a68d833268a09be05a7670020e42068a9fc1f544" +python-versions = "^3.12,<3.14" +content-hash = "ff4b60b92f57d274444459e4376b65b77ca7efb174fea87082abff6cdb7fc6d5" diff --git a/pyproject.toml b/pyproject.toml index 0abc4bbe0a..516fcf083a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,8 +13,8 @@ packages = [ ] [tool.poetry.dependencies] -python = "^3.12" -litellm = "^1.60.0, !=1.64.4" # avoid 1.64.4 (known bug) +python = "^3.12,<3.14" +litellm = "^1.60.0, !=1.64.4, !=1.67.*" # avoid 1.64.4 (known bug) & 1.67.* (known bug #10272) aiohttp = ">=3.9.0,!=3.11.13" # Pin to avoid yanked version 3.11.13 google-generativeai = "*" # To use litellm with Gemini Pro API google-api-python-client = "^2.164.0" # For Google Sheets API @@ -78,6 +78,7 @@ prompt-toolkit = "^3.0.50" mcpm = "1.9.0" poetry = "^2.1.2" anyio = "4.9.0" +pythonnet = "*" [tool.poetry.group.dev.dependencies] ruff = "0.11.8" diff --git a/tests/runtime/test_bash.py b/tests/runtime/test_bash.py index 9fffb9237e..68ac617302 100644 --- a/tests/runtime/test_bash.py +++ b/tests/runtime/test_bash.py @@ -1,6 +1,7 @@ """Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox.""" import os +import sys import time from pathlib import Path @@ -20,6 +21,11 @@ from openhands.runtime.impl.local.local_runtime import LocalRuntime # ============================================================================================================================ +# Helper function to determine if running on Windows +def is_windows(): + return sys.platform == 'win32' + + def _run_cmd_action(runtime, custom_command: str): action = CmdRunAction(command=custom_command) logger.info(action, extra={'msg_type': 'ACTION'}) @@ -29,28 +35,22 @@ def _run_cmd_action(runtime, custom_command: str): return obs -def test_bash_command_env(temp_dir, runtime_cls, run_as_openhands): - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) - try: - obs = runtime.run_action(CmdRunAction(command='env')) - assert isinstance( - obs, CmdOutputObservation - ), 'The observation should be a CmdOutputObservation.' - assert obs.exit_code == 0, 'The exit code should be 0.' - finally: - _close_test_runtime(runtime) +# Get platform-appropriate command +def get_platform_command(linux_cmd, windows_cmd): + return windows_cmd if is_windows() else linux_cmd def test_bash_server(temp_dir, runtime_cls, run_as_openhands): runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) try: - action = CmdRunAction(command='python3 -m http.server 8080') + # Use python -u for unbuffered output, potentially helping capture initial output on Windows + action = CmdRunAction(command='python -u -m http.server 8081') action.set_hard_timeout(1) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert isinstance(obs, CmdOutputObservation) assert obs.exit_code == -1 - assert 'Serving HTTP on 0.0.0.0 port 8080' in obs.content + assert 'Serving HTTP on' in obs.content assert ( "[The command timed out after 1.0 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, or send keys to interrupt/kill the command.]" in obs.metadata.suffix @@ -62,26 +62,36 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands): logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert isinstance(obs, CmdOutputObservation) assert obs.exit_code == 0 - assert 'Keyboard interrupt received, exiting.' in obs.content - assert config.workspace_mount_path_in_sandbox in obs.metadata.working_dir + if not is_windows(): + # Linux/macOS behavior + assert 'Keyboard interrupt received, exiting.' in obs.content + assert config.workspace_mount_path_in_sandbox in obs.metadata.working_dir + else: + # Windows behavior: Stop-Job might not produce output, but exit code should be 0 + # The working directory check might also be less relevant/predictable here + pass + # Verify the server is actually stopped by trying to start another one + # on the same port (regardless of OS) action = CmdRunAction(command='ls') action.set_hard_timeout(1) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert isinstance(obs, CmdOutputObservation) assert obs.exit_code == 0 + # Check that the interrupt message is NOT present in subsequent output assert 'Keyboard interrupt received, exiting.' not in obs.content + # Check working directory remains correct after interrupt handling assert config.workspace_mount_path_in_sandbox in obs.metadata.working_dir # run it again! - action = CmdRunAction(command='python3 -m http.server 8080') + action = CmdRunAction(command='python -u -m http.server 8081') action.set_hard_timeout(1) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert isinstance(obs, CmdOutputObservation) assert obs.exit_code == -1 - assert 'Serving HTTP on 0.0.0.0 port 8080' in obs.content + assert 'Serving HTTP on' in obs.content finally: _close_test_runtime(runtime) @@ -102,7 +112,12 @@ def test_bash_background_server(temp_dir, runtime_cls, run_as_openhands): time.sleep(1) # Verify the server is running by curling it - curl_action = CmdRunAction(f'curl http://localhost:{server_port}') + if is_windows(): + curl_action = CmdRunAction( + f'Invoke-WebRequest -Uri http://localhost:{server_port} -UseBasicParsing | Select-Object -ExpandProperty Content' + ) + else: + curl_action = CmdRunAction(f'curl http://localhost:{server_port}') curl_obs = runtime.run_action(curl_action) logger.info(curl_obs, extra={'msg_type': 'OBSERVATION'}) assert isinstance(curl_obs, CmdOutputObservation) @@ -110,6 +125,17 @@ def test_bash_background_server(temp_dir, runtime_cls, run_as_openhands): # Check for content typical of python http.server directory listing assert 'Directory listing for' in curl_obs.content + # Kill the server + if is_windows(): + # Use PowerShell job management commands instead of trying to kill process directly + kill_action = CmdRunAction('Get-Job | Stop-Job') + else: + kill_action = CmdRunAction('pkill -f "http.server"') + kill_obs = runtime.run_action(kill_action) + logger.info(kill_obs, extra={'msg_type': 'OBSERVATION'}) + assert isinstance(kill_obs, CmdOutputObservation) + assert kill_obs.exit_code == 0 + finally: _close_test_runtime(runtime) @@ -117,69 +143,44 @@ def test_bash_background_server(temp_dir, runtime_cls, run_as_openhands): def test_multiline_commands(temp_dir, runtime_cls): runtime, config = _load_runtime(temp_dir, runtime_cls) try: - # single multiline command - obs = _run_cmd_action(runtime, 'echo \\\n -e "foo"') - assert obs.exit_code == 0, 'The exit code should be 0.' - assert 'foo' in obs.content + if is_windows(): + # Windows PowerShell version using backticks for line continuation + obs = _run_cmd_action(runtime, 'Write-Output `\n "foo"') + assert obs.exit_code == 0, 'The exit code should be 0.' + assert 'foo' in obs.content - # test multiline echo - obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"') - assert obs.exit_code == 0, 'The exit code should be 0.' - assert 'hello\nworld' in obs.content + # test multiline output + obs = _run_cmd_action(runtime, 'Write-Output "hello`nworld"') + assert obs.exit_code == 0, 'The exit code should be 0.' + assert 'hello\nworld' in obs.content - # test whitespace - obs = _run_cmd_action(runtime, 'echo -e "a\\n\\n\\nz"') - assert obs.exit_code == 0, 'The exit code should be 0.' - assert '\n\n\n' in obs.content - finally: - _close_test_runtime(runtime) - - -def test_multiple_multiline_commands(temp_dir, runtime_cls, run_as_openhands): - cmds = [ - 'ls -l', - 'echo -e "hello\nworld"', - """echo -e "hello it's me\"""", - """echo \\ - -e 'hello' \\ - -v""", - """echo -e 'hello\\nworld\\nare\\nyou\\nthere?'""", - """echo -e 'hello\nworld\nare\nyou\n\nthere?'""", - """echo -e 'hello\nworld "'""", - ] - joined_cmds = '\n'.join(cmds) - - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) - try: - # First test that running multiple commands at once fails - obs = _run_cmd_action(runtime, joined_cmds) - assert isinstance(obs, ErrorObservation) - assert 'Cannot execute multiple commands at once' in obs.content - - # Now run each command individually and verify they work - results = [] - for cmd in cmds: - obs = _run_cmd_action(runtime, cmd) - assert isinstance(obs, CmdOutputObservation) - assert obs.exit_code == 0 - results.append(obs.content) - - # Verify all expected outputs are present - assert 'total 0' in results[0] # ls -l - assert 'hello\nworld' in results[1] # echo -e "hello\nworld" - assert "hello it's me" in results[2] # echo -e "hello it\'s me" - assert 'hello -v' in results[3] # echo -e 'hello' -v - assert ( - 'hello\nworld\nare\nyou\nthere?' in results[4] - ) # echo -e 'hello\nworld\nare\nyou\nthere?' - assert ( - 'hello\nworld\nare\nyou\n\nthere?' in results[5] - ) # echo -e with literal newlines - assert 'hello\nworld "' in results[6] # echo -e with quote + # test whitespace + obs = _run_cmd_action(runtime, 'Write-Output "a`n`n`nz"') + assert obs.exit_code == 0, 'The exit code should be 0.' + assert '\n\n\n' in obs.content + else: + # Original Linux bash version + # single multiline command + obs = _run_cmd_action(runtime, 'echo \\\n -e "foo"') + assert obs.exit_code == 0, 'The exit code should be 0.' + assert 'foo' in obs.content + + # test multiline echo + obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"') + assert obs.exit_code == 0, 'The exit code should be 0.' + assert 'hello\nworld' in obs.content + + # test whitespace + obs = _run_cmd_action(runtime, 'echo -e "a\\n\\n\\nz"') + assert obs.exit_code == 0, 'The exit code should be 0.' + assert '\n\n\n' in obs.content finally: _close_test_runtime(runtime) +@pytest.mark.skipif( + is_windows(), reason='Test relies on Linux bash-specific complex commands' +) def test_complex_commands(temp_dir, runtime_cls, run_as_openhands): cmd = """count=0; tries=0; while [ $count -lt 3 ]; do result=$(echo "Heads"); tries=$((tries+1)); echo "Flip $tries: $result"; if [ "$result" = "Heads" ]; then count=$((count+1)); else count=0; fi; done; echo "Got 3 heads in a row after $tries flips!";""" @@ -198,7 +199,10 @@ def test_no_ps2_in_output(temp_dir, runtime_cls, run_as_openhands): """Test that the PS2 sign is not added to the output of a multiline command.""" runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) try: - obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"') + if is_windows(): + obs = _run_cmd_action(runtime, 'Write-Output "hello`nworld"') + else: + obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"') assert obs.exit_code == 0, 'The exit code should be 0.' assert 'hello\nworld' in obs.content @@ -207,6 +211,9 @@ def test_no_ps2_in_output(temp_dir, runtime_cls, run_as_openhands): _close_test_runtime(runtime) +@pytest.mark.skipif( + is_windows(), reason='Test uses Linux-specific bash loops and sed commands' +) def test_multiline_command_loop(temp_dir, runtime_cls): # https://github.com/All-Hands-AI/OpenHands/issues/3143 init_cmd = """mkdir -p _modules && \ @@ -234,43 +241,133 @@ done && echo "success" _close_test_runtime(runtime) +def test_multiple_multiline_commands(temp_dir, runtime_cls, run_as_openhands): + if is_windows(): + cmds = [ + 'Get-ChildItem', + 'Write-Output "hello`nworld"', + """Write-Output "hello it's me\"""", + """Write-Output ` + ('hello ' + ` + 'world')""", + """Write-Output 'hello\nworld\nare\nyou\nthere?'""", + """Write-Output 'hello\nworld\nare\nyou\n\nthere?'""", + """Write-Output 'hello\nworld "'""", # Escape the trailing double quote + ] + else: + cmds = [ + 'ls -l', + 'echo -e "hello\nworld"', + """echo -e "hello it's me\"""", + """echo \\ + -e 'hello' \\ + world""", + """echo -e 'hello\\nworld\\nare\\nyou\\nthere?'""", + """echo -e 'hello\nworld\nare\nyou\n\nthere?'""", + """echo -e 'hello\nworld "'""", + ] + joined_cmds = '\n'.join(cmds) + + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + # First test that running multiple commands at once fails + obs = _run_cmd_action(runtime, joined_cmds) + assert isinstance(obs, ErrorObservation) + assert 'Cannot execute multiple commands at once' in obs.content + + # Now run each command individually and verify they work + results = [] + for cmd in cmds: + obs = _run_cmd_action(runtime, cmd) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + results.append(obs.content) + + # Verify all expected outputs are present + if is_windows(): + assert '.git_config' in results[0] # Get-ChildItem + else: + assert 'total 0' in results[0] # ls -l + assert 'hello\nworld' in results[1] # echo -e "hello\nworld" + assert "hello it's me" in results[2] # echo -e "hello it\'s me" + assert 'hello world' in results[3] # echo -e 'hello' world + assert ( + 'hello\nworld\nare\nyou\nthere?' in results[4] + ) # echo -e 'hello\nworld\nare\nyou\nthere?' + assert ( + 'hello\nworld\nare\nyou\n\nthere?' in results[5] + ) # echo -e with literal newlines + assert 'hello\nworld "' in results[6] # echo -e with quote + finally: + _close_test_runtime(runtime) + + def test_cmd_run(temp_dir, runtime_cls, run_as_openhands): runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) try: - obs = _run_cmd_action( - runtime, f'ls -l {config.workspace_mount_path_in_sandbox}' - ) - assert obs.exit_code == 0 + if is_windows(): + # Windows PowerShell version + obs = _run_cmd_action( + runtime, f'Get-ChildItem -Path {config.workspace_mount_path_in_sandbox}' + ) + assert obs.exit_code == 0 - obs = _run_cmd_action(runtime, 'ls -l') - assert obs.exit_code == 0 - assert 'total 0' in obs.content + obs = _run_cmd_action(runtime, 'Get-ChildItem') + assert obs.exit_code == 0 - obs = _run_cmd_action(runtime, 'mkdir test') - assert obs.exit_code == 0 + obs = _run_cmd_action(runtime, 'New-Item -ItemType Directory -Path test') + assert obs.exit_code == 0 - obs = _run_cmd_action(runtime, 'ls -l') - assert obs.exit_code == 0 - if run_as_openhands: - assert 'openhands' in obs.content - elif runtime_cls == LocalRuntime: - assert 'root' not in obs.content and 'openhands' not in obs.content + obs = _run_cmd_action(runtime, 'Get-ChildItem') + assert obs.exit_code == 0 + assert 'test' in obs.content + + obs = _run_cmd_action(runtime, 'New-Item -ItemType File -Path test/foo.txt') + assert obs.exit_code == 0 + + obs = _run_cmd_action(runtime, 'Get-ChildItem test') + assert obs.exit_code == 0 + assert 'foo.txt' in obs.content + + # clean up + _run_cmd_action(runtime, 'Remove-Item -Recurse -Force test') + assert obs.exit_code == 0 else: - assert 'root' in obs.content - assert 'test' in obs.content + # Unix version + obs = _run_cmd_action( + runtime, f'ls -l {config.workspace_mount_path_in_sandbox}' + ) + assert obs.exit_code == 0 - obs = _run_cmd_action(runtime, 'touch test/foo.txt') - assert obs.exit_code == 0 + obs = _run_cmd_action(runtime, 'ls -l') + assert obs.exit_code == 0 + assert 'total 0' in obs.content - obs = _run_cmd_action(runtime, 'ls -l test') - assert obs.exit_code == 0 - assert 'foo.txt' in obs.content + obs = _run_cmd_action(runtime, 'mkdir test') + assert obs.exit_code == 0 - # clean up: this is needed, since CI will not be - # run as root, and this test may leave a file - # owned by root - _run_cmd_action(runtime, 'rm -rf test') - assert obs.exit_code == 0 + obs = _run_cmd_action(runtime, 'ls -l') + assert obs.exit_code == 0 + if run_as_openhands: + assert 'openhands' in obs.content + elif runtime_cls == LocalRuntime: + assert 'root' not in obs.content and 'openhands' not in obs.content + else: + assert 'root' in obs.content + assert 'test' in obs.content + + obs = _run_cmd_action(runtime, 'touch test/foo.txt') + assert obs.exit_code == 0 + + obs = _run_cmd_action(runtime, 'ls -l test') + assert obs.exit_code == 0 + assert 'foo.txt' in obs.content + + # clean up: this is needed, since CI will not be + # run as root, and this test may leave a file + # owned by root + _run_cmd_action(runtime, 'rm -rf test') + assert obs.exit_code == 0 finally: _close_test_runtime(runtime) @@ -278,14 +375,27 @@ def test_cmd_run(temp_dir, runtime_cls, run_as_openhands): def test_run_as_user_correct_home_dir(temp_dir, runtime_cls, run_as_openhands): runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) try: - obs = _run_cmd_action(runtime, 'cd ~ && pwd') - assert obs.exit_code == 0 - if runtime_cls == LocalRuntime: - assert os.getenv('HOME') in obs.content - elif run_as_openhands: - assert '/home/openhands' in obs.content + if is_windows(): + # Windows PowerShell version + obs = _run_cmd_action(runtime, 'cd $HOME && Get-Location') + assert obs.exit_code == 0 + # Check for Windows-style home paths + if runtime_cls == LocalRuntime: + assert ( + os.getenv('USERPROFILE') in obs.content + or os.getenv('HOME') in obs.content + ) + # For non-local runtime, we are less concerned with precise paths else: - assert '/root' in obs.content + # Original Linux version + obs = _run_cmd_action(runtime, 'cd ~ && pwd') + assert obs.exit_code == 0 + if runtime_cls == LocalRuntime: + assert os.getenv('HOME') in obs.content + elif run_as_openhands: + assert '/home/openhands' in obs.content + else: + assert '/root' in obs.content finally: _close_test_runtime(runtime) @@ -293,10 +403,18 @@ def test_run_as_user_correct_home_dir(temp_dir, runtime_cls, run_as_openhands): def test_multi_cmd_run_in_single_line(temp_dir, runtime_cls): runtime, config = _load_runtime(temp_dir, runtime_cls) try: - obs = _run_cmd_action(runtime, 'pwd && ls -l') - assert obs.exit_code == 0 - assert config.workspace_mount_path_in_sandbox in obs.content - assert 'total 0' in obs.content + if is_windows(): + # Windows PowerShell version using semicolon + obs = _run_cmd_action(runtime, 'Get-Location && Get-ChildItem') + assert obs.exit_code == 0 + assert config.workspace_mount_path_in_sandbox in obs.content + assert '.git_config' in obs.content + else: + # Original Linux version using && + obs = _run_cmd_action(runtime, 'pwd && ls -l') + assert obs.exit_code == 0 + assert config.workspace_mount_path_in_sandbox in obs.content + assert 'total 0' in obs.content finally: _close_test_runtime(runtime) @@ -304,15 +422,35 @@ def test_multi_cmd_run_in_single_line(temp_dir, runtime_cls): def test_stateful_cmd(temp_dir, runtime_cls): runtime, config = _load_runtime(temp_dir, runtime_cls) try: - obs = _run_cmd_action(runtime, 'mkdir -p test') - assert obs.exit_code == 0, 'The exit code should be 0.' + if is_windows(): + # Windows PowerShell version + obs = _run_cmd_action( + runtime, 'New-Item -ItemType Directory -Path test -Force' + ) + assert obs.exit_code == 0, 'The exit code should be 0.' - obs = _run_cmd_action(runtime, 'cd test') - assert obs.exit_code == 0, 'The exit code should be 0.' + obs = _run_cmd_action(runtime, 'Set-Location test') + assert obs.exit_code == 0, 'The exit code should be 0.' - obs = _run_cmd_action(runtime, 'pwd') - assert obs.exit_code == 0, 'The exit code should be 0.' - assert f'{config.workspace_mount_path_in_sandbox}/test' in obs.content + obs = _run_cmd_action(runtime, 'Get-Location') + assert obs.exit_code == 0, 'The exit code should be 0.' + # Account for both forward and backward slashes in path + norm_path = config.workspace_mount_path_in_sandbox.replace( + '\\', '/' + ).replace('//', '/') + test_path = f'{norm_path}/test'.replace('//', '/') + assert test_path in obs.content.replace('\\', '/') + else: + # Original Linux version + obs = _run_cmd_action(runtime, 'mkdir -p test') + assert obs.exit_code == 0, 'The exit code should be 0.' + + obs = _run_cmd_action(runtime, 'cd test') + assert obs.exit_code == 0, 'The exit code should be 0.' + + obs = _run_cmd_action(runtime, 'pwd') + assert obs.exit_code == 0, 'The exit code should be 0.' + assert f'{config.workspace_mount_path_in_sandbox}/test' in obs.content finally: _close_test_runtime(runtime) @@ -340,13 +478,22 @@ def test_copy_single_file(temp_dir, runtime_cls): _create_test_file(temp_dir) runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir) - obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') - assert obs.exit_code == 0 - assert 'test_file.txt' in obs.content + if is_windows(): + obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}') + assert obs.exit_code == 0 + assert 'test_file.txt' in obs.content - obs = _run_cmd_action(runtime, f'cat {sandbox_file}') - assert obs.exit_code == 0 - assert 'Hello, World!' in obs.content + obs = _run_cmd_action(runtime, f'Get-Content {sandbox_file}') + assert obs.exit_code == 0 + assert 'Hello, World!' in obs.content + else: + obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') + assert obs.exit_code == 0 + assert 'test_file.txt' in obs.content + + obs = _run_cmd_action(runtime, f'cat {sandbox_file}') + assert obs.exit_code == 0 + assert 'Hello, World!' in obs.content finally: _close_test_runtime(runtime) @@ -373,20 +520,40 @@ def test_copy_directory_recursively(temp_dir, runtime_cls): runtime.copy_to(temp_dir_copy, sandbox_dir, recursive=True) - obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') - assert obs.exit_code == 0 - assert 'test_dir' in obs.content - assert 'file1.txt' not in obs.content - assert 'file2.txt' not in obs.content + if is_windows(): + obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}') + assert obs.exit_code == 0 + assert 'test_dir' in obs.content + assert 'file1.txt' not in obs.content + assert 'file2.txt' not in obs.content - obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}/test_dir') - assert obs.exit_code == 0 - assert 'file1.txt' in obs.content - assert 'file2.txt' in obs.content + obs = _run_cmd_action( + runtime, f'Get-ChildItem -Path {sandbox_dir}/test_dir' + ) + assert obs.exit_code == 0 + assert 'file1.txt' in obs.content + assert 'file2.txt' in obs.content - obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_dir/file1.txt') - assert obs.exit_code == 0 - assert 'File 1 content' in obs.content + obs = _run_cmd_action( + runtime, f'Get-Content {sandbox_dir}/test_dir/file1.txt' + ) + assert obs.exit_code == 0 + assert 'File 1 content' in obs.content + else: + obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') + assert obs.exit_code == 0 + assert 'test_dir' in obs.content + assert 'file1.txt' not in obs.content + assert 'file2.txt' not in obs.content + + obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}/test_dir') + assert obs.exit_code == 0 + assert 'file1.txt' in obs.content + assert 'file2.txt' in obs.content + + obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_dir/file1.txt') + assert obs.exit_code == 0 + assert 'File 1 content' in obs.content finally: _close_test_runtime(runtime) @@ -411,26 +578,62 @@ def test_overwrite_existing_file(temp_dir, runtime_cls): runtime, config = _load_runtime(temp_dir, runtime_cls) try: sandbox_dir = config.workspace_mount_path_in_sandbox + sandbox_file = os.path.join(sandbox_dir, 'test_file.txt') - obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') - assert obs.exit_code == 0 + if is_windows(): + # Check initial state + obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}') + assert obs.exit_code == 0 + assert 'test_file.txt' not in obs.content - obs = _run_cmd_action(runtime, f'touch {sandbox_dir}/test_file.txt') - assert obs.exit_code == 0 + # Create an empty file + obs = _run_cmd_action( + runtime, f'New-Item -ItemType File -Path {sandbox_file} -Force' + ) + assert obs.exit_code == 0 - obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') - assert obs.exit_code == 0 + # Verify file exists and is empty + obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}') + assert obs.exit_code == 0 + assert 'test_file.txt' in obs.content - obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_file.txt') - assert obs.exit_code == 0 - assert 'Hello, World!' not in obs.content + obs = _run_cmd_action(runtime, f'Get-Content {sandbox_file}') + assert obs.exit_code == 0 + assert obs.content.strip() == '' # Empty file + assert 'Hello, World!' not in obs.content - _create_test_file(temp_dir) - runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir) + # Create host file and copy to overwrite + _create_test_file(temp_dir) + runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir) - obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_file.txt') - assert obs.exit_code == 0 - assert 'Hello, World!' in obs.content + # Verify file content is overwritten + obs = _run_cmd_action(runtime, f'Get-Content {sandbox_file}') + assert obs.exit_code == 0 + assert 'Hello, World!' in obs.content + else: + # Original Linux version + obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') + assert obs.exit_code == 0 + assert 'test_file.txt' not in obs.content # Check initial state + + obs = _run_cmd_action(runtime, f'touch {sandbox_file}') + assert obs.exit_code == 0 + + obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}') + assert obs.exit_code == 0 + assert 'test_file.txt' in obs.content + + obs = _run_cmd_action(runtime, f'cat {sandbox_file}') + assert obs.exit_code == 0 + assert obs.content.strip() == '' # Empty file + assert 'Hello, World!' not in obs.content + + _create_test_file(temp_dir) + runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir) + + obs = _run_cmd_action(runtime, f'cat {sandbox_file}') + assert obs.exit_code == 0 + assert 'Hello, World!' in obs.content finally: _close_test_runtime(runtime) @@ -468,11 +671,15 @@ def test_copy_from_directory(temp_dir, runtime_cls): # Result is returned as a path assert isinstance(result, Path) - result.unlink() + if result.exists() and not is_windows(): + result.unlink() finally: _close_test_runtime(runtime) +@pytest.mark.skipif( + is_windows(), reason='Test uses Linux-specific file permissions and sudo commands' +) def test_git_operation(temp_dir, runtime_cls): # do not mount workspace, since workspace mount by tests will be owned by root # while the user_id we get via os.getuid() is different from root @@ -579,31 +786,67 @@ def test_pwd_property(temp_dir, runtime_cls, run_as_openhands): def test_basic_command(temp_dir, runtime_cls, run_as_openhands): runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) try: - # Test simple command - obs = _run_cmd_action(runtime, "echo 'hello world'") - assert 'hello world' in obs.content - assert obs.exit_code == 0 + if is_windows(): + # Test simple command + obs = _run_cmd_action(runtime, "Write-Output 'hello world'") + assert 'hello world' in obs.content + assert obs.exit_code == 0 - # Test command with error - obs = _run_cmd_action(runtime, 'nonexistent_command') - assert obs.exit_code == 127 - assert 'nonexistent_command: command not found' in obs.content + # Test command with error + obs = _run_cmd_action(runtime, 'nonexistent_command') + assert obs.exit_code != 0 + assert 'not recognized' in obs.content or 'command not found' in obs.content - # Test command with special characters - obs = _run_cmd_action(runtime, "echo 'hello world with\nspecial chars'") - assert 'hello world with\nspecial chars' in obs.content - assert obs.exit_code == 0 + # Test command with special characters + obs = _run_cmd_action( + runtime, 'Write-Output "hello world with`nspecial chars"' + ) + assert 'hello world with\nspecial chars' in obs.content + assert obs.exit_code == 0 - # Test multiple commands in sequence - obs = _run_cmd_action(runtime, 'echo "first" && echo "second" && echo "third"') - assert 'first' in obs.content - assert 'second' in obs.content - assert 'third' in obs.content - assert obs.exit_code == 0 + # Test multiple commands in sequence + obs = _run_cmd_action( + runtime, + 'Write-Output "first" && Write-Output "second" && Write-Output "third"', + ) + assert 'first' in obs.content + assert 'second' in obs.content + assert 'third' in obs.content + assert obs.exit_code == 0 + else: + # Original Linux version + # Test simple command + obs = _run_cmd_action(runtime, "echo 'hello world'") + assert 'hello world' in obs.content + assert obs.exit_code == 0 + + # Test command with error + obs = _run_cmd_action(runtime, 'nonexistent_command') + assert obs.exit_code == 127 + assert 'nonexistent_command: command not found' in obs.content + + # Test command with special characters + obs = _run_cmd_action( + runtime, "echo 'hello world with\nspecial chars'" + ) + assert 'hello world with\nspecial chars' in obs.content + assert obs.exit_code == 0 + + # Test multiple commands in sequence + obs = _run_cmd_action( + runtime, 'echo "first" && echo "second" && echo "third"' + ) + assert 'first' in obs.content + assert 'second' in obs.content + assert 'third' in obs.content + assert obs.exit_code == 0 finally: _close_test_runtime(runtime) +@pytest.mark.skipif( + is_windows(), reason='Powershell does not support interactive commands' +) def test_interactive_command(temp_dir, runtime_cls, run_as_openhands): runtime, config = _load_runtime( temp_dir, @@ -640,6 +883,10 @@ EOF""") _close_test_runtime(runtime) +@pytest.mark.skipif( + is_windows(), + reason='Test relies on Linux-specific commands like seq and bash for loops', +) def test_long_output(temp_dir, runtime_cls, run_as_openhands): runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) try: @@ -654,6 +901,10 @@ def test_long_output(temp_dir, runtime_cls, run_as_openhands): _close_test_runtime(runtime) +@pytest.mark.skipif( + is_windows(), + reason='Test relies on Linux-specific commands like seq and bash for loops', +) def test_long_output_exceed_history_limit(temp_dir, runtime_cls, run_as_openhands): runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) try: @@ -670,6 +921,9 @@ def test_long_output_exceed_history_limit(temp_dir, runtime_cls, run_as_openhand _close_test_runtime(runtime) +@pytest.mark.skipif( + is_windows(), reason='Test uses Linux-specific temp directory and bash for loops' +) def test_long_output_from_nested_directories(temp_dir, runtime_cls, run_as_openhands): runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) try: @@ -695,6 +949,10 @@ def test_long_output_from_nested_directories(temp_dir, runtime_cls, run_as_openh _close_test_runtime(runtime) +@pytest.mark.skipif( + is_windows(), + reason='Test uses Linux-specific commands like find and grep with complex syntax', +) def test_command_backslash(temp_dir, runtime_cls, run_as_openhands): runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) try: @@ -722,167 +980,9 @@ def test_command_backslash(temp_dir, runtime_cls, run_as_openhands): _close_test_runtime(runtime) -def test_command_output_continuation(temp_dir, runtime_cls, run_as_openhands): - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) - try: - # Start a command that produces output slowly - action = CmdRunAction('for i in {1..5}; do echo $i; sleep 3; done') - action.set_hard_timeout(2.5) - obs = runtime.run_action(action) - assert obs.content.strip() == '1' - assert obs.metadata.prefix == '' - assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix - - # Continue watching output - action = CmdRunAction('') - action.set_hard_timeout(2.5) - obs = runtime.run_action(action) - assert '[Below is the output of the previous command.]' in obs.metadata.prefix - assert obs.content.strip() == '2' - assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix - - # Continue until completion - for expected in ['3', '4', '5']: - action = CmdRunAction('') - action.set_hard_timeout(2.5) - obs = runtime.run_action(action) - assert ( - '[Below is the output of the previous command.]' in obs.metadata.prefix - ) - assert obs.content.strip() == expected - assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix - - # Final empty command to complete - action = CmdRunAction('') - obs = runtime.run_action(action) - assert '[The command completed with exit code 0.]' in obs.metadata.suffix - finally: - _close_test_runtime(runtime) - - -def test_long_running_command_follow_by_execute( - temp_dir, runtime_cls, run_as_openhands -): - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) - try: - # Test command that produces output slowly - action = CmdRunAction('for i in {1..3}; do echo $i; sleep 3; done') - action.set_hard_timeout(2.5) - obs = runtime.run_action(action) - assert '1' in obs.content # First number should appear before timeout - assert obs.metadata.exit_code == -1 # -1 indicates command is still running - assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix - assert obs.metadata.prefix == '' - - # Continue watching output - action = CmdRunAction('') - action.set_hard_timeout(2.5) - obs = runtime.run_action(action) - assert '2' in obs.content - assert obs.metadata.prefix == '[Below is the output of the previous command.]\n' - assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix - assert obs.metadata.exit_code == -1 # -1 indicates command is still running - - # Test command that produces no output - action = CmdRunAction('sleep 15') - action.set_hard_timeout(2.5) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert '3' not in obs.content - assert obs.metadata.prefix == '[Below is the output of the previous command.]\n' - assert 'The previous command is still running' in obs.metadata.suffix - assert obs.metadata.exit_code == -1 # -1 indicates command is still running - - # Finally continue again - action = CmdRunAction('') - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert '3' in obs.content - assert '[The command completed with exit code 0.]' in obs.metadata.suffix - finally: - _close_test_runtime(runtime) - - -def test_empty_command_errors(temp_dir, runtime_cls, run_as_openhands): - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) - try: - # Test empty command without previous command - obs = runtime.run_action(CmdRunAction('')) - assert isinstance(obs, CmdOutputObservation) - assert ( - 'ERROR: No previous running command to retrieve logs from.' in obs.content - ) - finally: - _close_test_runtime(runtime) - - -def test_python_interactive_input(temp_dir, runtime_cls, run_as_openhands): - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) - try: - # Test Python program that asks for input - properly escaped for bash - python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')""" - - # Start Python with the interactive script - obs = runtime.run_action(CmdRunAction(f'python3 -c "{python_script}"')) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert 'Enter your name:' in obs.content - assert obs.metadata.exit_code == -1 # -1 indicates command is still running - - # Send first input (name) - obs = runtime.run_action(CmdRunAction('Alice', is_input=True)) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert 'Enter your age:' in obs.content - assert obs.metadata.exit_code == -1 - - # Send second input (age) - obs = runtime.run_action(CmdRunAction('25', is_input=True)) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert 'Hello Alice, you are 25 years old' in obs.content - assert obs.metadata.exit_code == 0 - assert '[The command completed with exit code 0.]' in obs.metadata.suffix - finally: - _close_test_runtime(runtime) - - -def test_python_interactive_input_without_set_input( - temp_dir, runtime_cls, run_as_openhands -): - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) - try: - # Test Python program that asks for input - properly escaped for bash - python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')""" - - # Start Python with the interactive script - obs = runtime.run_action(CmdRunAction(f'python3 -c "{python_script}"')) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert 'Enter your name:' in obs.content - assert obs.metadata.exit_code == -1 # -1 indicates command is still running - - # Send first input (name) - obs = runtime.run_action(CmdRunAction('Alice', is_input=False)) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert 'Enter your age:' not in obs.content - assert ( - 'Your command "Alice" is NOT executed. The previous command is still running' - in obs.metadata.suffix - ) - assert obs.metadata.exit_code == -1 - - # Try again now with input - obs = runtime.run_action(CmdRunAction('Alice', is_input=True)) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert 'Enter your age:' in obs.content - assert obs.metadata.exit_code == -1 - - obs = runtime.run_action(CmdRunAction('25', is_input=True)) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert 'Hello Alice, you are 25 years old' in obs.content - assert obs.metadata.exit_code == 0 - assert '[The command completed with exit code 0.]' in obs.metadata.suffix - finally: - _close_test_runtime(runtime) - - +@pytest.mark.skipif( + is_windows(), reason='Test uses Linux-specific ps aux, awk, and grep commands' +) def test_stress_long_output_with_soft_and_hard_timeout( temp_dir, runtime_cls, run_as_openhands ): @@ -973,10 +1073,228 @@ def test_stress_long_output_with_soft_and_hard_timeout( _close_test_runtime(runtime) +def test_command_output_continuation(temp_dir, runtime_cls, run_as_openhands): + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + if is_windows(): + # Windows PowerShell version + action = CmdRunAction( + '1..5 | ForEach-Object { Write-Output $_; Start-Sleep 3 }' + ) + action.set_hard_timeout(2.5) + obs = runtime.run_action(action) + assert obs.content.strip() == '1' + assert obs.metadata.prefix == '' + assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + + # Continue watching output + action = CmdRunAction('') + action.set_hard_timeout(2.5) + obs = runtime.run_action(action) + assert ( + '[Below is the output of the previous command.]' in obs.metadata.prefix + ) + assert obs.content.strip() == '2' + assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + + # Continue until completion + for expected in ['3', '4', '5']: + action = CmdRunAction('') + action.set_hard_timeout(2.5) + obs = runtime.run_action(action) + assert ( + '[Below is the output of the previous command.]' + in obs.metadata.prefix + ) + assert obs.content.strip() == expected + assert ( + '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + ) + + # Final empty command to complete + action = CmdRunAction('') + obs = runtime.run_action(action) + assert '[The command completed with exit code 0.]' in obs.metadata.suffix + else: + # Original Linux version + # Start a command that produces output slowly + action = CmdRunAction('for i in {1..5}; do echo $i; sleep 3; done') + action.set_hard_timeout(2.5) + obs = runtime.run_action(action) + assert obs.content.strip() == '1' + assert obs.metadata.prefix == '' + assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + + # Continue watching output + action = CmdRunAction('') + action.set_hard_timeout(2.5) + obs = runtime.run_action(action) + assert ( + '[Below is the output of the previous command.]' in obs.metadata.prefix + ) + assert obs.content.strip() == '2' + assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + + # Continue until completion + for expected in ['3', '4', '5']: + action = CmdRunAction('') + action.set_hard_timeout(2.5) + obs = runtime.run_action(action) + assert ( + '[Below is the output of the previous command.]' + in obs.metadata.prefix + ) + assert obs.content.strip() == expected + assert ( + '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + ) + + # Final empty command to complete + action = CmdRunAction('') + obs = runtime.run_action(action) + assert '[The command completed with exit code 0.]' in obs.metadata.suffix + finally: + _close_test_runtime(runtime) + + +def test_long_running_command_follow_by_execute( + temp_dir, runtime_cls, run_as_openhands +): + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + if is_windows(): + action = CmdRunAction('1..3 | ForEach-Object { Write-Output $_; sleep 3 }') + else: + # Test command that produces output slowly + action = CmdRunAction('for i in {1..3}; do echo $i; sleep 3; done') + + action.set_hard_timeout(2.5) + obs = runtime.run_action(action) + assert '1' in obs.content # First number should appear before timeout + assert obs.metadata.exit_code == -1 # -1 indicates command is still running + assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + assert obs.metadata.prefix == '' + + # Continue watching output + action = CmdRunAction('') + action.set_hard_timeout(2.5) + obs = runtime.run_action(action) + assert '2' in obs.content + assert obs.metadata.prefix == '[Below is the output of the previous command.]\n' + assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + assert obs.metadata.exit_code == -1 # -1 indicates command is still running + + # Test command that produces no output + action = CmdRunAction('sleep 15') + action.set_hard_timeout(2.5) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert '3' not in obs.content + assert obs.metadata.prefix == '[Below is the output of the previous command.]\n' + assert 'The previous command is still running' in obs.metadata.suffix + assert obs.metadata.exit_code == -1 # -1 indicates command is still running + + # Finally continue again + action = CmdRunAction('') + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert '3' in obs.content + assert '[The command completed with exit code 0.]' in obs.metadata.suffix + finally: + _close_test_runtime(runtime) + + +def test_empty_command_errors(temp_dir, runtime_cls, run_as_openhands): + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + # Test empty command without previous command - behavior should be the same on all platforms + obs = runtime.run_action(CmdRunAction('')) + assert isinstance(obs, CmdOutputObservation) + assert ( + 'ERROR: No previous running command to retrieve logs from.' in obs.content + ) + finally: + _close_test_runtime(runtime) + + +@pytest.mark.skipif( + is_windows(), reason='Powershell does not support interactive commands' +) +def test_python_interactive_input(temp_dir, runtime_cls, run_as_openhands): + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + # Test Python program that asks for input - same for both platforms + python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')""" + + # Start Python with the interactive script + # For both platforms we can use the same command + obs = runtime.run_action(CmdRunAction(f'python -c "{python_script}"')) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert 'Enter your name:' in obs.content + assert obs.metadata.exit_code == -1 # -1 indicates command is still running + + # Send first input (name) + obs = runtime.run_action(CmdRunAction('Alice', is_input=True)) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert 'Enter your age:' in obs.content + assert obs.metadata.exit_code == -1 + + # Send second input (age) + obs = runtime.run_action(CmdRunAction('25', is_input=True)) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert 'Hello Alice, you are 25 years old' in obs.content + assert obs.metadata.exit_code == 0 + assert '[The command completed with exit code 0.]' in obs.metadata.suffix + finally: + _close_test_runtime(runtime) + + +@pytest.mark.skipif( + is_windows(), reason='Powershell does not support interactive commands' +) +def test_python_interactive_input_without_set_input( + temp_dir, runtime_cls, run_as_openhands +): + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + # Test Python program that asks for input + python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')""" + + # Start Python with the interactive script + obs = runtime.run_action(CmdRunAction(f'python -c "{python_script}"')) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert 'Enter your name:' in obs.content + assert obs.metadata.exit_code == -1 # -1 indicates command is still running + + # Send first input (name) + obs = runtime.run_action(CmdRunAction('Alice', is_input=False)) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert 'Enter your age:' not in obs.content + assert ( + 'Your command "Alice" is NOT executed. The previous command is still running' + in obs.metadata.suffix + ) + assert obs.metadata.exit_code == -1 + + # Try again now with input + obs = runtime.run_action(CmdRunAction('Alice', is_input=True)) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert 'Enter your age:' in obs.content + assert obs.metadata.exit_code == -1 + + obs = runtime.run_action(CmdRunAction('25', is_input=True)) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert 'Hello Alice, you are 25 years old' in obs.content + assert obs.metadata.exit_code == 0 + assert '[The command completed with exit code 0.]' in obs.metadata.suffix + finally: + _close_test_runtime(runtime) + + def test_bash_remove_prefix(temp_dir, runtime_cls, run_as_openhands): runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) try: - # create a git repo + # create a git repo - same for both platforms action = CmdRunAction( 'git init && git remote add origin https://github.com/All-Hands-AI/OpenHands' ) @@ -984,12 +1302,11 @@ def test_bash_remove_prefix(temp_dir, runtime_cls, run_as_openhands): # logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.metadata.exit_code == 0 - # Start Python with the interactive script + # Check git remote - same for both platforms obs = runtime.run_action(CmdRunAction('git remote -v')) # logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.metadata.exit_code == 0 assert 'https://github.com/All-Hands-AI/OpenHands' in obs.content assert 'git remote -v' not in obs.content - finally: _close_test_runtime(runtime) diff --git a/tests/runtime/test_ipython.py b/tests/runtime/test_ipython.py index c9fe1bf3e7..75375e5232 100644 --- a/tests/runtime/test_ipython.py +++ b/tests/runtime/test_ipython.py @@ -194,6 +194,52 @@ def test_ipython_simple(temp_dir, runtime_cls): _close_test_runtime(runtime) +def test_ipython_chdir(temp_dir, runtime_cls): + """Test that os.chdir correctly handles paths with slashes.""" + runtime, config = _load_runtime(temp_dir, runtime_cls) + + # Create a test directory and get its absolute path + test_code = """ +import os +os.makedirs('test_dir', exist_ok=True) +abs_path = os.path.abspath('test_dir') +print(abs_path) +""" + action_ipython = IPythonRunCellAction(code=test_code) + logger.info(action_ipython, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action_ipython) + assert isinstance(obs, IPythonRunCellObservation) + test_dir_path = obs.content.split('\n')[0].strip() + logger.info(f'test_dir_path: {test_dir_path}') + assert test_dir_path # Verify we got a valid path + + # Change to the test directory using its absolute path + test_code = f""" +import os +os.chdir(r'{test_dir_path}') +print(os.getcwd()) +""" + action_ipython = IPythonRunCellAction(code=test_code) + logger.info(action_ipython, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action_ipython) + assert isinstance(obs, IPythonRunCellObservation) + current_dir = obs.content.split('\n')[0].strip() + assert current_dir == test_dir_path # Verify we changed to the correct directory + + # Clean up + test_code = """ +import os +import shutil +shutil.rmtree('test_dir', ignore_errors=True) +""" + action_ipython = IPythonRunCellAction(code=test_code) + logger.info(action_ipython, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action_ipython) + assert isinstance(obs, IPythonRunCellObservation) + + _close_test_runtime(runtime) + + def test_ipython_package_install(temp_dir, runtime_cls, run_as_openhands): """Make sure that cd in bash also update the current working directory in ipython.""" runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) diff --git a/tests/unit/test_windows_bash.py b/tests/unit/test_windows_bash.py new file mode 100644 index 0000000000..b9f1793f28 --- /dev/null +++ b/tests/unit/test_windows_bash.py @@ -0,0 +1,594 @@ +import os +import sys +import tempfile +import time +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from openhands.events.action import CmdRunAction +from openhands.events.observation import ErrorObservation +from openhands.events.observation.commands import ( + CmdOutputObservation, +) + +# Skip all tests in this module if not running on Windows +pytestmark = pytest.mark.skipif( + sys.platform != 'win32', reason='WindowsPowershellSession tests require Windows' +) + + +@pytest.fixture +def temp_work_dir(): + """Create a temporary directory for testing.""" + with tempfile.TemporaryDirectory() as temp_dir: + yield temp_dir + + +@pytest.fixture +def windows_bash_session(temp_work_dir): + """Create a WindowsPowershellSession instance for testing.""" + # Instantiate the class. Initialization happens in __init__. + session = WindowsPowershellSession( + work_dir=temp_work_dir, + username=None, + ) + assert session._initialized # Should be true after __init__ + yield session + # Ensure cleanup happens even if test fails + session.close() + + +if sys.platform == 'win32': + from openhands.runtime.utils.windows_bash import WindowsPowershellSession + + +def test_command_execution(windows_bash_session): + """Test basic command execution.""" + # Test a simple command + action = CmdRunAction(command="Write-Output 'Hello World'") + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + # Check content, stripping potential trailing newlines + content = result.content.strip() + assert content == 'Hello World' + assert result.exit_code == 0 + + # Test a simple command with multiline input but single line output + action = CmdRunAction( + command="""Write-Output ` + ('hello ' + ` + 'world')""" + ) + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + # Check content, stripping potential trailing newlines + content = result.content.strip() + assert content == 'hello world' + assert result.exit_code == 0 + + # Test a simple command with a newline + action = CmdRunAction(command='Write-Output "Hello\\n World"') + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + # Check content, stripping potential trailing newlines + content = result.content.strip() + assert content == 'Hello\\n World' + assert result.exit_code == 0 + + +def test_command_with_error(windows_bash_session): + """Test command execution with an error reported via Write-Error.""" + # Test a command that will write an error + action = CmdRunAction(command="Write-Error 'Test Error'") + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + # Error stream is captured and appended + assert 'ERROR' in result.content + # Our implementation should set exit code to 1 when errors occur in stream + assert result.exit_code == 1 + + +def test_command_failure_exit_code(windows_bash_session): + """Test command execution that results in a non-zero exit code.""" + # Test a command that causes a script failure (e.g., invalid cmdlet) + action = CmdRunAction(command='Get-NonExistentCmdlet') + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + # Error should be captured in the output + assert 'ERROR' in result.content + assert ( + 'is not recognized' in result.content + or 'CommandNotFoundException' in result.content + ) + assert result.exit_code == 1 + + +def test_control_commands(windows_bash_session): + """Test handling of control commands (not supported).""" + # Test Ctrl+C - should return ErrorObservation if no command is running + action_c = CmdRunAction(command='C-c', is_input=True) + result_c = windows_bash_session.execute(action_c) + assert isinstance(result_c, ErrorObservation) + assert 'No previous running command to interact with' in result_c.content + + # Run a long-running command + action_long_running = CmdRunAction(command='Start-Sleep -Seconds 100') + result_long_running = windows_bash_session.execute(action_long_running) + assert isinstance(result_long_running, CmdOutputObservation) + assert result_long_running.exit_code == -1 + + # Test unsupported control command + action_d = CmdRunAction(command='C-d', is_input=True) + result_d = windows_bash_session.execute(action_d) + assert "Your input command 'C-d' was NOT processed" in result_d.metadata.suffix + assert ( + 'Direct input to running processes (is_input=True) is not supported by this PowerShell session implementation.' + in result_d.metadata.suffix + ) + assert 'You can use C-c to stop the process' in result_d.metadata.suffix + + # Ctrl+C now can cancel the long-running command + action_c = CmdRunAction(command='C-c', is_input=True) + result_c = windows_bash_session.execute(action_c) + assert isinstance(result_c, CmdOutputObservation) + assert result_c.exit_code == 0 + + +def test_command_timeout(windows_bash_session): + """Test command timeout handling.""" + # Test a command that will timeout + test_timeout_sec = 1 + action = CmdRunAction(command='Start-Sleep -Seconds 5') + action.set_hard_timeout(test_timeout_sec) + start_time = time.monotonic() + result = windows_bash_session.execute(action) + duration = time.monotonic() - start_time + + assert isinstance(result, CmdOutputObservation) + # Check for timeout specific metadata + assert 'timed out' in result.metadata.suffix.lower() # Check suffix, not content + assert result.exit_code == -1 # Timeout should result in exit code -1 + # Check that it actually timed out near the specified time + assert abs(duration - test_timeout_sec) < 0.5 # Allow some buffer + + +def test_long_running_command(windows_bash_session): + action = CmdRunAction(command='python -u -m http.server 8081') + action.set_hard_timeout(1) + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + # Verify the initial output was captured + assert 'Serving HTTP on' in result.content + # Check for timeout specific metadata + assert ( + "[The command timed out after 1.0 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, or send keys to interrupt/kill the command.]" + in result.metadata.suffix + ) + assert result.exit_code == -1 + + # The action timed out, but the command should be still running + # We should now be able to interrupt it + action = CmdRunAction(command='C-c', is_input=True) + action.set_hard_timeout(30) # Give it enough time to stop + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + # On Windows, Stop-Job termination doesn't inherently return output. + # The CmdOutputObservation will have content="" and exit_code=0 if successful. + # The KeyboardInterrupt message assertion is removed as it's added manually + # by the wrapper and might not be guaranteed depending on timing/implementation details. + assert result.exit_code == 0 + + # Verify the server is actually stopped by starting another one on the same port + action = CmdRunAction(command='python -u -m http.server 8081') + action.set_hard_timeout(1) # Set a short timeout to check if it starts + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + # Verify the initial output was captured, indicating the port was free + assert 'Serving HTTP on' in result.content + # The command will time out again, so the exit code should be -1 + assert result.exit_code == -1 + + # Clean up the second server process + action = CmdRunAction(command='C-c', is_input=True) + action.set_hard_timeout(30) + result = windows_bash_session.execute(action) + assert result.exit_code == 0 + + +def test_multiple_commands_rejected_and_individual_execution(windows_bash_session): + """Test that executing multiple commands separated by newline is rejected, + but individual commands (including multiline) execute correctly.""" + # Define a list of commands, including multiline and special characters + cmds = [ + 'Get-ChildItem', + 'Write-Output "hello`nworld"', + """Write-Output "hello it's me\"""", + """Write-Output ` + 'hello' ` + -NoNewline""", + """Write-Output 'hello`nworld`nare`nyou`nthere?'""", + """Write-Output 'hello`nworld`nare`nyou`n`nthere?'""", + """Write-Output 'hello`nworld `"'""", # Escape the trailing double quote + ] + joined_cmds = '\n'.join(cmds) + + # 1. Test that executing multiple commands at once fails + action_multi = CmdRunAction(command=joined_cmds) + result_multi = windows_bash_session.execute(action_multi) + + assert isinstance(result_multi, ErrorObservation) + assert 'ERROR: Cannot execute multiple commands at once' in result_multi.content + + # 2. Now run each command individually and verify they work + results = [] + for cmd in cmds: + action_single = CmdRunAction(command=cmd) + obs = windows_bash_session.execute(action_single) + assert isinstance(obs, CmdOutputObservation) + assert obs.exit_code == 0 + results.append(obs.content.strip()) # Strip trailing newlines for comparison + + +def test_working_directory(windows_bash_session, temp_work_dir): + """Test working directory handling.""" + initial_cwd = windows_bash_session._cwd + abs_temp_work_dir = os.path.abspath(temp_work_dir) + assert initial_cwd == abs_temp_work_dir + + # Create a subdirectory + sub_dir_path = Path(abs_temp_work_dir) / 'subdir' + sub_dir_path.mkdir() + assert sub_dir_path.is_dir() + + # Test changing directory + action_cd = CmdRunAction(command='Set-Location subdir') + result_cd = windows_bash_session.execute(action_cd) + assert isinstance(result_cd, CmdOutputObservation) + assert result_cd.exit_code == 0 + + # Check that the session's internal CWD state was updated - only check the last component of path + assert windows_bash_session._cwd.lower().endswith('\\subdir') + # Check that the metadata reflects the directory *after* the command + assert result_cd.metadata.working_dir.lower().endswith('\\subdir') + + # Execute a command in the new directory to confirm + action_pwd = CmdRunAction(command='(Get-Location).Path') + result_pwd = windows_bash_session.execute(action_pwd) + assert isinstance(result_pwd, CmdOutputObservation) + assert result_pwd.exit_code == 0 + # Check the command output reflects the new directory + assert result_pwd.content.strip().lower().endswith('\\subdir') + # Metadata should also reflect the current directory + assert result_pwd.metadata.working_dir.lower().endswith('\\subdir') + + # Test changing back to original directory + action_cd_back = CmdRunAction(command=f"Set-Location '{abs_temp_work_dir}'") + result_cd_back = windows_bash_session.execute(action_cd_back) + assert isinstance(result_cd_back, CmdOutputObservation) + assert result_cd_back.exit_code == 0 + # Check only the base name of the temp directory + temp_dir_basename = os.path.basename(abs_temp_work_dir) + assert windows_bash_session._cwd.lower().endswith(temp_dir_basename.lower()) + assert result_cd_back.metadata.working_dir.lower().endswith( + temp_dir_basename.lower() + ) + + +def test_cleanup(windows_bash_session): + """Test proper cleanup of resources (runspace).""" + # Session should be initialized before close + assert windows_bash_session._initialized + assert windows_bash_session.runspace is not None + + # Close the session + windows_bash_session.close() + + # Verify cleanup + assert not windows_bash_session._initialized + assert windows_bash_session.runspace is None + assert windows_bash_session._closed + + +def test_syntax_error_handling(windows_bash_session): + """Test handling of syntax errors in PowerShell commands.""" + # Test invalid command syntax + action = CmdRunAction(command="Write-Output 'Missing Quote") + result = windows_bash_session.execute(action) + assert isinstance(result, ErrorObservation) + # Error message appears in the output via PowerShell error stream + assert 'missing' in result.content.lower() or 'terminator' in result.content.lower() + + +def test_special_characters_handling(windows_bash_session): + """Test handling of commands containing special characters.""" + # Test command with special characters + special_chars_cmd = '''Write-Output "Special Chars: \\`& \\`| \\`< \\`> \\`\\` \\`' \\`\" \\`! \\`$ \\`% \\`^ \\`( \\`) \\`- \\`= \\`+ \\`[ \\`] \\`{ \\`} \\`; \\`: \\`, \\`. \\`? \\`/ \\`~"''' + action = CmdRunAction(command=special_chars_cmd) + result = windows_bash_session.execute(action) + assert isinstance(result, CmdOutputObservation) + # Check output contains the special characters + assert 'Special Chars:' in result.content + assert '&' in result.content and '|' in result.content + assert result.exit_code == 0 + + +def test_empty_command(windows_bash_session): + """Test handling of empty command string when no command is running.""" + action = CmdRunAction(command='') + result = windows_bash_session.execute(action) + assert isinstance(result, CmdOutputObservation) + # Should indicate error as per test_bash.py behavior + assert 'ERROR: No previous running command to retrieve logs from.' in result.content + # Exit code is typically 0 even for this specific "error" message in the bash implementation + assert result.exit_code == 0 + + +def test_exception_during_execution(windows_bash_session): + """Test handling of exceptions during command execution.""" + # Patch the PowerShell class itself within the module where it's used + patch_target = 'openhands.runtime.utils.windows_bash.PowerShell' + + # Create a mock PowerShell class + mock_powershell_class = MagicMock() + # Configure its Create method (which is called in execute) to raise an exception + # This simulates an error during the creation of the PowerShell object itself. + mock_powershell_class.Create.side_effect = Exception( + 'Test exception from mocked Create' + ) + + with patch(patch_target, mock_powershell_class): + action = CmdRunAction(command="Write-Output 'Test'") + # Now, when execute calls PowerShell.Create(), it will hit our mock and raise the exception + result = windows_bash_session.execute(action) + + # The exception should be caught by the try...except block in execute() + assert isinstance(result, ErrorObservation) + # Check the error message generated by the execute method's exception handler + assert 'Failed to start PowerShell job' in result.content + assert 'Test exception from mocked Create' in result.content + + +def test_streaming_output(windows_bash_session): + """Test handling of streaming output from commands.""" + # Command that produces output incrementally + command = """ + 1..3 | ForEach-Object { + Write-Output "Line $_" + Start-Sleep -Milliseconds 100 + } + """ + action = CmdRunAction(command=command) + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + assert 'Line 1' in result.content + assert 'Line 2' in result.content + assert 'Line 3' in result.content + assert result.exit_code == 0 + + +def test_shutdown_signal_handling(windows_bash_session): + """Test handling of shutdown signal during command execution.""" + # This would require mocking the shutdown_listener, which might be complex. + # For now, we'll just verify that a long-running command can be executed + # and that execute() returns properly. + command = 'Start-Sleep -Seconds 1' + action = CmdRunAction(command=command) + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + assert result.exit_code == 0 + + +def test_runspace_state_after_error(windows_bash_session): + """Test that the runspace remains usable after a command error.""" + # First, execute a command with an error + error_action = CmdRunAction(command='NonExistentCommand') + error_result = windows_bash_session.execute(error_action) + assert isinstance(error_result, CmdOutputObservation) + assert error_result.exit_code == 1 + + # Then, execute a valid command + valid_action = CmdRunAction(command="Write-Output 'Still working'") + valid_result = windows_bash_session.execute(valid_action) + assert isinstance(valid_result, CmdOutputObservation) + assert 'Still working' in valid_result.content + assert valid_result.exit_code == 0 + + +def test_stateful_file_operations(windows_bash_session, temp_work_dir): + """Test file operations to verify runspace state persistence. + + This test verifies that: + 1. The working directory state persists between commands + 2. File operations work correctly relative to the current directory + 3. The runspace maintains state for path-dependent operations + """ + abs_temp_work_dir = os.path.abspath(temp_work_dir) + + # 1. Create a subdirectory + sub_dir_name = 'file_test_dir' + sub_dir_path = Path(abs_temp_work_dir) / sub_dir_name + + # Use PowerShell to create directory + create_dir_action = CmdRunAction( + command=f'New-Item -Path "{sub_dir_name}" -ItemType Directory' + ) + result = windows_bash_session.execute(create_dir_action) + assert result.exit_code == 0 + + # Verify directory exists on disk + assert sub_dir_path.exists() and sub_dir_path.is_dir() + + # 2. Change to the new directory + cd_action = CmdRunAction(command=f"Set-Location '{sub_dir_name}'") + result = windows_bash_session.execute(cd_action) + assert result.exit_code == 0 + # Check only the last directory component + assert windows_bash_session._cwd.lower().endswith(f'\\{sub_dir_name.lower()}') + + # 3. Create a file in the current directory (which should be the subdirectory) + test_content = 'This is a test file created by PowerShell' + create_file_action = CmdRunAction( + command=f'Set-Content -Path "test_file.txt" -Value "{test_content}"' + ) + result = windows_bash_session.execute(create_file_action) + assert result.exit_code == 0 + + # 4. Verify file exists at the expected path (in the subdirectory) + expected_file_path = sub_dir_path / 'test_file.txt' + assert expected_file_path.exists() and expected_file_path.is_file() + + # 5. Read file contents using PowerShell and verify + read_file_action = CmdRunAction(command='Get-Content -Path "test_file.txt"') + result = windows_bash_session.execute(read_file_action) + assert result.exit_code == 0 + assert test_content in result.content + + # 6. Go back to parent and try to access file using relative path + cd_parent_action = CmdRunAction(command='Set-Location ..') + result = windows_bash_session.execute(cd_parent_action) + assert result.exit_code == 0 + # Check only the base name of the temp directory + temp_dir_basename = os.path.basename(abs_temp_work_dir) + assert windows_bash_session._cwd.lower().endswith(temp_dir_basename.lower()) + + # 7. Read the file using relative path + read_from_parent_action = CmdRunAction( + command=f'Get-Content -Path "{sub_dir_name}/test_file.txt"' + ) + result = windows_bash_session.execute(read_from_parent_action) + assert result.exit_code == 0 + assert test_content in result.content + + # 8. Clean up + remove_file_action = CmdRunAction( + command=f'Remove-Item -Path "{sub_dir_name}/test_file.txt" -Force' + ) + result = windows_bash_session.execute(remove_file_action) + assert result.exit_code == 0 + + +def test_command_output_continuation(windows_bash_session): + """Test retrieving continued output using empty command after timeout.""" + # Windows PowerShell version + action = CmdRunAction('1..5 | ForEach-Object { Write-Output $_; Start-Sleep 3 }') + action.set_hard_timeout(2.5) + obs = windows_bash_session.execute(action) + assert obs.content.strip() == '1' + assert obs.metadata.prefix == '' + assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + + # Continue watching output + action = CmdRunAction('') + action.set_hard_timeout(2.5) + obs = windows_bash_session.execute(action) + assert '[Below is the output of the previous command.]' in obs.metadata.prefix + assert obs.content.strip() == '2' + assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + + # Continue until completion + for expected in ['3', '4', '5']: + action = CmdRunAction('') + action.set_hard_timeout(2.5) + obs = windows_bash_session.execute(action) + assert '[Below is the output of the previous command.]' in obs.metadata.prefix + assert obs.content.strip() == expected + assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + + # Final empty command to complete + action = CmdRunAction('') + obs = windows_bash_session.execute(action) + assert '[The command completed with exit code 0.]' in obs.metadata.suffix + + +def test_long_running_command_followed_by_execute(windows_bash_session): + """Tests behavior when a new command is sent while another is running after timeout.""" + # Start a slow command + action = CmdRunAction('1..3 | ForEach-Object { Write-Output $_; Start-Sleep 3 }') + action.set_hard_timeout(2.5) + obs = windows_bash_session.execute(action) + assert '1' in obs.content # First number should appear before timeout + assert obs.metadata.exit_code == -1 # -1 indicates command is still running + assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + assert obs.metadata.prefix == '' + + # Continue watching output + action = CmdRunAction('') + action.set_hard_timeout(2.5) + obs = windows_bash_session.execute(action) + assert '2' in obs.content + assert obs.metadata.prefix == '[Below is the output of the previous command.]\n' + assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix + assert obs.metadata.exit_code == -1 # -1 indicates command is still running + + # Test command that produces no output + action = CmdRunAction('sleep 15') + action.set_hard_timeout(2.5) + obs = windows_bash_session.execute(action) + assert '3' not in obs.content + assert obs.metadata.prefix == '[Below is the output of the previous command.]\n' + assert 'The previous command is still running' in obs.metadata.suffix + assert obs.metadata.exit_code == -1 # -1 indicates command is still running + + # Finally continue again + action = CmdRunAction('') + obs = windows_bash_session.execute(action) + assert '3' in obs.content + assert '[The command completed with exit code 0.]' in obs.metadata.suffix + + +def test_command_non_existent_file(windows_bash_session): + """Test command execution for a non-existent file returns non-zero exit code.""" + # Use Get-Content which should fail if the file doesn't exist + action = CmdRunAction(command='Get-Content non_existent_file.txt') + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + # Check that the exit code is non-zero (should be 1 due to the '$?' check) + assert result.exit_code == 1 + # Check that the error message is captured in the output (error stream part) + assert 'Cannot find path' in result.content or 'does not exist' in result.content + + +def test_interactive_input(windows_bash_session): + """Test interactive input attempt reflects implementation limitations.""" + action = CmdRunAction('$name = Read-Host "Enter name"') + result = windows_bash_session.execute(action) + + assert isinstance(result, CmdOutputObservation) + assert ( + 'A command that prompts the user failed because the host program or the command type does not support user interaction. The host was attempting to request confirmation with the following message' + in result.content + ) + assert result.exit_code == 1 + + +def test_windows_path_handling(windows_bash_session, temp_work_dir): + """Test that os.chdir works with both forward slashes and escaped backslashes on Windows.""" + # Create a test directory + test_dir = Path(temp_work_dir) / 'test_dir' + test_dir.mkdir() + + # Test both path formats + path_formats = [ + str(test_dir).replace('\\', '/'), # Forward slashes + str(test_dir).replace('\\', '\\\\'), # Escaped backslashes + ] + + for path in path_formats: + # Test changing directory using os.chdir through PowerShell + action = CmdRunAction(command=f'python -c "import os; os.chdir(\'{path}\')"') + result = windows_bash_session.execute(action) + assert isinstance(result, CmdOutputObservation) + assert result.exit_code == 0, f'Failed with path format: {path}'