fix - Speed up runtime tests (#11570)

Co-authored-by: Rohit Malhotra <rohitvinodmalhotra@gmail.com> Co-authored-by: openhands <openhands@all-hands.dev>
2026-01-07 22:14:03 -05:00 · 2025-11-04 11:17:55 -06:00
parent f1abe6c6af
commit 9abd1714b9
7 changed files with 134 additions and 79 deletions
--- a/.github/workflows/ghcr-build.yml
+++ b/.github/workflows/ghcr-build.yml
@@ -86,7 +86,7 @@ jobs:

  # Builds the runtime Docker images
  ghcr_build_runtime:
-    name: Build Image
+    name: Build Runtime Image
    runs-on: blacksmith-8vcpu-ubuntu-2204
    if: "!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/ext-v'))"
    permissions:
@@ -256,7 +256,7 @@ jobs:
  test_runtime_root:
    name: RT Unit Tests (Root)
    needs: [ghcr_build_runtime, define-matrix]
-    runs-on: blacksmith-8vcpu-ubuntu-2204
+    runs-on: blacksmith-4vcpu-ubuntu-2404
    strategy:
      fail-fast: false
      matrix:
@@ -298,7 +298,7 @@ jobs:
          # We install pytest-xdist in order to run tests across CPUs
          poetry run pip install pytest-xdist

-          # Install to be able to retry on failures for flaky tests
+          # Install to be able to retry on failures for flakey tests
          poetry run pip install pytest-rerunfailures

          image_name=ghcr.io/${{ env.REPO_OWNER }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image.tag }}
@@ -311,14 +311,14 @@ jobs:
          SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
          TEST_IN_CI=true \
          RUN_AS_OPENHANDS=false \
-          poetry run pytest -n 0 -raRs --reruns 2 --reruns-delay 5 -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py --durations=10
+          poetry run pytest -n 5 -raRs --reruns 2 --reruns-delay 3 -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py --durations=10
        env:
          DEBUG: "1"

  # Run unit tests with the Docker runtime Docker images as openhands user
  test_runtime_oh:
    name: RT Unit Tests (openhands)
-    runs-on: blacksmith-8vcpu-ubuntu-2204
+    runs-on: blacksmith-4vcpu-ubuntu-2404
    needs: [ghcr_build_runtime, define-matrix]
    strategy:
      matrix:
@@ -370,7 +370,7 @@ jobs:
          SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
          TEST_IN_CI=true \
          RUN_AS_OPENHANDS=true \
-          poetry run pytest -n 0 -raRs --reruns 2 --reruns-delay 5 -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py --durations=10
+          poetry run pytest -n 5 -raRs --reruns 2 --reruns-delay 3 -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py --durations=10
        env:
          DEBUG: "1"

--- a/.github/workflows/py-tests.yml
+++ b/.github/workflows/py-tests.yml
@@ -48,7 +48,10 @@ jobs:
          python-version: ${{ matrix.python-version }}
          cache: "poetry"
      - name: Install Python dependencies using Poetry
-        run: poetry install --with dev,test,runtime
+        run: |
+          poetry install --with dev,test,runtime
+          poetry run pip install pytest-xdist
+          poetry run pip install pytest-rerunfailures
      - name: Build Environment
        run: make build
      - name: Run Unit Tests
@@ -56,7 +59,7 @@ jobs:
        env:
          COVERAGE_FILE: ".coverage.${{ matrix.python_version }}"
      - name: Run Runtime Tests with CLIRuntime
-        run: PYTHONPATH=".:$PYTHONPATH" TEST_RUNTIME=cli poetry run pytest -s tests/runtime/test_bash.py --cov=openhands --cov-branch
+        run: PYTHONPATH=".:$PYTHONPATH" TEST_RUNTIME=cli poetry run pytest -n 5 --reruns 2 --reruns-delay 3 -s tests/runtime/test_bash.py --cov=openhands --cov-branch
        env:
          COVERAGE_FILE: ".coverage.runtime.${{ matrix.python_version }}"
      - name: Store coverage file
@@ -88,7 +91,7 @@ jobs:
      - name: Install Python dependencies using Poetry
        run: poetry install --with dev,test,runtime
      - name: Run Windows unit tests
-        run: poetry run pytest -svv tests/unit/runtime/utils/test_windows_bash.py
+        run: poetry run pytest -svv tests/runtime//test_windows_bash.py
        env:
          PYTHONPATH: ".;$env:PYTHONPATH"
          DEBUG: "1"
@@ -173,7 +176,6 @@ jobs:
          path: ".coverage.openhands-cli.${{ matrix.python-version }}"
          include-hidden-files: true

-
  coverage-comment:
    name: Coverage Comment
    if: github.event_name == 'pull_request'
--- a/tests/runtime/conftest.py
+++ b/tests/runtime/conftest.py
@@ -17,6 +17,7 @@ from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
 from openhands.runtime.impl.local.local_runtime import LocalRuntime
 from openhands.runtime.impl.remote.remote_runtime import RemoteRuntime
 from openhands.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
+from openhands.runtime.utils.port_lock import find_available_port_with_lock
 from openhands.storage import get_file_store
 from openhands.utils.async_utils import call_async_from_sync

@@ -294,9 +295,49 @@ def _load_runtime(
    return runtime, runtime.config


+# Port range for test HTTP servers (separate from runtime ports to avoid conflicts)
+TEST_HTTP_SERVER_PORT_RANGE = (18000, 18999)
+
+
+@pytest.fixture
+def dynamic_port(request):
+    """Allocate a dynamic port with locking to prevent race conditions in parallel tests.
+
+    This fixture uses the existing port locking system to ensure that parallel test
+    workers don't try to use the same port for HTTP servers.
+
+    Returns:
+        int: An available port number that is locked for this test
+    """
+    result = find_available_port_with_lock(
+        min_port=TEST_HTTP_SERVER_PORT_RANGE[0],
+        max_port=TEST_HTTP_SERVER_PORT_RANGE[1],
+        max_attempts=20,
+        bind_address='0.0.0.0',
+        lock_timeout=2.0,
+    )
+
+    if result is None:
+        pytest.fail(
+            f'Could not allocate a dynamic port in range {TEST_HTTP_SERVER_PORT_RANGE}'
+        )
+
+    port, port_lock = result
+    logger.info(f'Allocated dynamic port {port} for test {request.node.name}')
+
+    def cleanup():
+        if port_lock:
+            port_lock.release()
+            logger.info(f'Released dynamic port {port} for test {request.node.name}')
+
+    request.addfinalizer(cleanup)
+    return port
+
+
 # Export necessary function
 __all__ = [
    '_load_runtime',
    '_get_host_folder',
    '_remove_folder',
+    'dynamic_port',
 ]
--- a/tests/runtime/test_bash.py
+++ b/tests/runtime/test_bash.py
@@ -51,12 +51,11 @@ def get_platform_command(linux_cmd, windows_cmd):
    return windows_cmd if is_windows() else linux_cmd


-@pytest.mark.skip(reason='This test is flaky')
-def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
+def test_bash_server(temp_dir, runtime_cls, run_as_openhands, dynamic_port):
    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
    try:
        # Use python -u for unbuffered output, potentially helping capture initial output on Windows
-        action = CmdRunAction(command='python -u -m http.server 8081')
+        action = CmdRunAction(command=f'python -u -m http.server {dynamic_port}')
        action.set_hard_timeout(1)
        obs = runtime.run_action(action)
        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -111,7 +110,7 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
            assert config.workspace_mount_path_in_sandbox in obs.metadata.working_dir

        # run it again!
-        action = CmdRunAction(command='python -u -m http.server 8081')
+        action = CmdRunAction(command=f'python -u -m http.server {dynamic_port}')
        action.set_hard_timeout(1)
        obs = runtime.run_action(action)
        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -123,9 +122,9 @@ def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
        _close_test_runtime(runtime)


-def test_bash_background_server(temp_dir, runtime_cls, run_as_openhands):
+def test_bash_background_server(temp_dir, runtime_cls, run_as_openhands, dynamic_port):
    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    server_port = 8081
+    server_port = dynamic_port
    try:
        # Start the server, expect it to timeout (run in background manner)
        action = CmdRunAction(f'python3 -m http.server {server_port} &')
--- a/tests/runtime/test_browsing.py
+++ b/tests/runtime/test_browsing.py
@@ -123,17 +123,21 @@ def find_element_by_tag_and_attributes(
    return None


-def test_browser_disabled(temp_dir, runtime_cls, run_as_openhands):
+def test_browser_disabled(temp_dir, runtime_cls, run_as_openhands, dynamic_port):
    runtime, _ = _load_runtime(
        temp_dir, runtime_cls, run_as_openhands, enable_browser=False
    )

-    action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &')
+    action_cmd = CmdRunAction(
+        command=f'python3 -m http.server {dynamic_port} > server.log 2>&1 &'
+    )
    logger.info(action_cmd, extra={'msg_type': 'ACTION'})
    obs = runtime.run_action(action_cmd)
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})

-    action_browse = BrowseURLAction(url='http://localhost:8000', return_axtree=False)
+    action_browse = BrowseURLAction(
+        url=f'http://localhost:{dynamic_port}', return_axtree=False
+    )
    logger.info(action_browse, extra={'msg_type': 'ACTION'})
    obs = runtime.run_action(action_browse)
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -143,13 +147,15 @@ def test_browser_disabled(temp_dir, runtime_cls, run_as_openhands):
    _close_test_runtime(runtime)


-def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
+def test_simple_browse(temp_dir, runtime_cls, run_as_openhands, dynamic_port):
    runtime, config = _load_runtime(
        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
    )

    # Test browse
-    action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &')
+    action_cmd = CmdRunAction(
+        command=f'python3 -m http.server {dynamic_port} > server.log 2>&1 &'
+    )
    logger.info(action_cmd, extra={'msg_type': 'ACTION'})
    obs = runtime.run_action(action_cmd)
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -164,17 +170,19 @@ def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert obs.exit_code == 0

-    action_browse = BrowseURLAction(url='http://localhost:8000', return_axtree=False)
+    action_browse = BrowseURLAction(
+        url=f'http://localhost:{dynamic_port}', return_axtree=False
+    )
    logger.info(action_browse, extra={'msg_type': 'ACTION'})
    obs = runtime.run_action(action_browse)
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})

    assert isinstance(obs, BrowserOutputObservation)
-    assert 'http://localhost:8000' in obs.url
+    assert f'http://localhost:{dynamic_port}' in obs.url
    assert not obs.error
-    assert obs.open_pages_urls == ['http://localhost:8000/']
+    assert obs.open_pages_urls == [f'http://localhost:{dynamic_port}/']
    assert obs.active_page_index == 0
-    assert obs.last_browser_action == 'goto("http://localhost:8000")'
+    assert obs.last_browser_action == f'goto("http://localhost:{dynamic_port}")'
    assert obs.last_browser_action_error == ''
    assert 'Directory listing for /' in obs.content
    assert 'server.log' in obs.content
@@ -189,7 +197,9 @@ def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
    _close_test_runtime(runtime)


-def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):
+def test_browser_navigation_actions(
+    temp_dir, runtime_cls, run_as_openhands, dynamic_port
+):
    """Test browser navigation actions: goto, go_back, go_forward, noop."""
    runtime, config = _load_runtime(
        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
@@ -234,7 +244,7 @@ def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):

        # Start HTTP server
        action_cmd = CmdRunAction(
-            command='python3 -m http.server 8000 > server.log 2>&1 &'
+            command=f'python3 -m http.server {dynamic_port} > server.log 2>&1 &'
        )
        logger.info(action_cmd, extra={'msg_type': 'ACTION'})
        obs = runtime.run_action(action_cmd)
@@ -249,7 +259,7 @@ def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):

        # Test goto action
        action_browse = BrowseInteractiveAction(
-            browser_actions='goto("http://localhost:8000/page1.html")',
+            browser_actions=f'goto("http://localhost:{dynamic_port}/page1.html")',
            return_axtree=False,
        )
        logger.info(action_browse, extra={'msg_type': 'ACTION'})
@@ -259,7 +269,7 @@ def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):
        assert isinstance(obs, BrowserOutputObservation)
        assert not obs.error
        assert 'Page 1' in obs.content
-        assert 'http://localhost:8000/page1.html' in obs.url
+        assert f'http://localhost:{dynamic_port}/page1.html' in obs.url

        # Test noop action (should not change page)
        action_browse = BrowseInteractiveAction(
@@ -272,11 +282,11 @@ def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):
        assert isinstance(obs, BrowserOutputObservation)
        assert not obs.error
        assert 'Page 1' in obs.content
-        assert 'http://localhost:8000/page1.html' in obs.url
+        assert f'http://localhost:{dynamic_port}/page1.html' in obs.url

        # Navigate to page 2
        action_browse = BrowseInteractiveAction(
-            browser_actions='goto("http://localhost:8000/page2.html")',
+            browser_actions=f'goto("http://localhost:{dynamic_port}/page2.html")',
            return_axtree=False,
        )
        logger.info(action_browse, extra={'msg_type': 'ACTION'})
@@ -286,7 +296,7 @@ def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):
        assert isinstance(obs, BrowserOutputObservation)
        assert not obs.error
        assert 'Page 2' in obs.content
-        assert 'http://localhost:8000/page2.html' in obs.url
+        assert f'http://localhost:{dynamic_port}/page2.html' in obs.url

        # Test go_back action
        action_browse = BrowseInteractiveAction(
@@ -299,7 +309,7 @@ def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):
        assert isinstance(obs, BrowserOutputObservation)
        assert not obs.error
        assert 'Page 1' in obs.content
-        assert 'http://localhost:8000/page1.html' in obs.url
+        assert f'http://localhost:{dynamic_port}/page1.html' in obs.url

        # Test go_forward action
        action_browse = BrowseInteractiveAction(
@@ -312,7 +322,7 @@ def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):
        assert isinstance(obs, BrowserOutputObservation)
        assert not obs.error
        assert 'Page 2' in obs.content
-        assert 'http://localhost:8000/page2.html' in obs.url
+        assert f'http://localhost:{dynamic_port}/page2.html' in obs.url

        # Clean up
        action_cmd = CmdRunAction(command='pkill -f "python3 -m http.server" || true')
@@ -324,7 +334,9 @@ def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):
        _close_test_runtime(runtime)


-def test_browser_form_interactions(temp_dir, runtime_cls, run_as_openhands):
+def test_browser_form_interactions(
+    temp_dir, runtime_cls, run_as_openhands, dynamic_port
+):
    """Test browser form interaction actions: fill, click, select_option, clear."""
    runtime, config = _load_runtime(
        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
@@ -370,7 +382,7 @@ def test_browser_form_interactions(temp_dir, runtime_cls, run_as_openhands):

        # Start HTTP server
        action_cmd = CmdRunAction(
-            command='python3 -m http.server 8000 > server.log 2>&1 &'
+            command=f'python3 -m http.server {dynamic_port} > server.log 2>&1 &'
        )
        logger.info(action_cmd, extra={'msg_type': 'ACTION'})
        obs = runtime.run_action(action_cmd)
@@ -385,7 +397,7 @@ def test_browser_form_interactions(temp_dir, runtime_cls, run_as_openhands):

        # Navigate to form page
        action_browse = BrowseInteractiveAction(
-            browser_actions='goto("http://localhost:8000/form.html")',
+            browser_actions=f'goto("http://localhost:{dynamic_port}/form.html")',
            return_axtree=True,  # Need axtree to get element bids
        )
        logger.info(action_browse, extra={'msg_type': 'ACTION'})
@@ -540,7 +552,9 @@ fill("{textarea_bid}", "This is a test message")
        _close_test_runtime(runtime)


-def test_browser_interactive_actions(temp_dir, runtime_cls, run_as_openhands):
+def test_browser_interactive_actions(
+    temp_dir, runtime_cls, run_as_openhands, dynamic_port
+):
    """Test browser interactive actions: scroll, hover, fill, press, focus."""
    runtime, config = _load_runtime(
        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
@@ -587,7 +601,7 @@ def test_browser_interactive_actions(temp_dir, runtime_cls, run_as_openhands):

        # Start HTTP server
        action_cmd = CmdRunAction(
-            command='python3 -m http.server 8000 > server.log 2>&1 &'
+            command=f'python3 -m http.server {dynamic_port} > server.log 2>&1 &'
        )
        logger.info(action_cmd, extra={'msg_type': 'ACTION'})
        obs = runtime.run_action(action_cmd)
@@ -602,7 +616,7 @@ def test_browser_interactive_actions(temp_dir, runtime_cls, run_as_openhands):

        # Navigate to scroll page
        action_browse = BrowseInteractiveAction(
-            browser_actions='goto("http://localhost:8000/scroll.html")',
+            browser_actions=f'goto("http://localhost:{dynamic_port}/scroll.html")',
            return_axtree=True,
        )
        logger.info(action_browse, extra={'msg_type': 'ACTION'})
@@ -748,7 +762,7 @@ scroll(0, 400)
        _close_test_runtime(runtime)


-def test_browser_file_upload(temp_dir, runtime_cls, run_as_openhands):
+def test_browser_file_upload(temp_dir, runtime_cls, run_as_openhands, dynamic_port):
    """Test browser file upload action."""
    runtime, config = _load_runtime(
        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
@@ -799,7 +813,7 @@ def test_browser_file_upload(temp_dir, runtime_cls, run_as_openhands):

        # Start HTTP server
        action_cmd = CmdRunAction(
-            command='python3 -m http.server 8000 > server.log 2>&1 &'
+            command=f'python3 -m http.server {dynamic_port} > server.log 2>&1 &'
        )
        logger.info(action_cmd, extra={'msg_type': 'ACTION'})
        obs = runtime.run_action(action_cmd)
@@ -814,7 +828,7 @@ def test_browser_file_upload(temp_dir, runtime_cls, run_as_openhands):

        # Navigate to upload page
        action_browse = BrowseInteractiveAction(
-            browser_actions='goto("http://localhost:8000/upload.html")',
+            browser_actions=f'goto("http://localhost:{dynamic_port}/upload.html")',
            return_axtree=True,
        )
        logger.info(action_browse, extra={'msg_type': 'ACTION'})
@@ -1049,7 +1063,8 @@ def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands):
        _close_test_runtime(runtime)


-def test_download_file(temp_dir, runtime_cls, run_as_openhands):
+@pytest.mark.skip(reason='This test is flaky')
+def test_download_file(temp_dir, runtime_cls, run_as_openhands, dynamic_port):
    """Test downloading a file using the browser."""
    runtime, config = _load_runtime(
        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
@@ -1142,7 +1157,7 @@ def test_download_file(temp_dir, runtime_cls, run_as_openhands):

        # Start HTTP server
        action_cmd = CmdRunAction(
-            command='python3 -m http.server 8000 > server.log 2>&1 &'
+            command=f'python3 -m http.server {dynamic_port} > server.log 2>&1 &'
        )
        logger.info(action_cmd, extra={'msg_type': 'ACTION'})
        obs = runtime.run_action(action_cmd)
@@ -1157,19 +1172,19 @@ def test_download_file(temp_dir, runtime_cls, run_as_openhands):
        logger.info(obs, extra={'msg_type': 'OBSERVATION'})

        # Browse to the HTML page
-        action_browse = BrowseURLAction(url='http://localhost:8000/download_test.html')
+        action_browse = BrowseURLAction(url=f'http://localhost:{dynamic_port}/')
        logger.info(action_browse, extra={'msg_type': 'ACTION'})
        obs = runtime.run_action(action_browse)
        logger.info(obs, extra={'msg_type': 'OBSERVATION'})

        # Verify the browser observation
        assert isinstance(obs, BrowserOutputObservation)
-        assert 'http://localhost:8000/download_test.html' in obs.url
+        assert f'http://localhost:{dynamic_port}/download_test.html' in obs.url
        assert not obs.error
        assert 'Download Test Page' in obs.content

        # Go to the PDF file url directly - this should trigger download
-        file_url = f'http://localhost:8000/{test_file_name}'
+        file_url = f'http://localhost:{dynamic_port}/{test_file_name}'
        action_browse = BrowseInteractiveAction(
            browser_actions=f'goto("{file_url}")',
        )
--- a/tests/runtime/test_mcp_action.py
+++ b/tests/runtime/test_mcp_action.py
@@ -140,7 +140,9 @@ def test_default_activated_tools():

@pytest.mark.skip('This test is flaky')
@pytest.mark.asyncio
-async def test_fetch_mcp_via_stdio(temp_dir, runtime_cls, run_as_openhands):
+async def test_fetch_mcp_via_stdio(
+    temp_dir, runtime_cls, run_as_openhands, dynamic_port
+):
    mcp_stdio_server_config = MCPStdioServerConfig(
        name='fetch', command='uvx', args=['mcp-server-fetch']
    )
@@ -154,7 +156,9 @@ async def test_fetch_mcp_via_stdio(temp_dir, runtime_cls, run_as_openhands):
    )

    # Test browser server
-    action_cmd = CmdRunAction(command='python3 -m http.server 8080 > server.log 2>&1 &')
+    action_cmd = CmdRunAction(
+        command=f'python3 -m http.server {dynamic_port} > server.log 2>&1 &'
+    )
    logger.info(action_cmd, extra={'msg_type': 'ACTION'})
    obs = runtime.run_action(action_cmd)
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -169,7 +173,9 @@ async def test_fetch_mcp_via_stdio(temp_dir, runtime_cls, run_as_openhands):
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert obs.exit_code == 0

-    mcp_action = MCPAction(name='fetch', arguments={'url': 'http://localhost:8080'})
+    mcp_action = MCPAction(
+        name='fetch', arguments={'url': f'http://localhost:{dynamic_port}'}
+    )
    obs = await runtime.call_tool_mcp(mcp_action)
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert isinstance(obs, MCPObservation), (
@@ -182,7 +188,7 @@ async def test_fetch_mcp_via_stdio(temp_dir, runtime_cls, run_as_openhands):
    assert result_json['content'][0]['type'] == 'text'
    assert (
        result_json['content'][0]['text']
-        == 'Contents of http://localhost:8080/:\n---\n\n* <.downloads/>\n* <server.log>\n\n---'
+        == f'Contents of http://localhost:{dynamic_port}/:\n---\n\n* <.downloads/>\n* <server.log>\n\n---'
    )

    runtime.close()
@@ -223,7 +229,7 @@ async def test_filesystem_mcp_via_sse(
@pytest.mark.skip('This test is flaky')
@pytest.mark.asyncio
 async def test_both_stdio_and_sse_mcp(
-    temp_dir, runtime_cls, run_as_openhands, sse_mcp_docker_server
+    temp_dir, runtime_cls, run_as_openhands, sse_mcp_docker_server, dynamic_port
 ):
    sse_server_info = sse_mcp_docker_server
    sse_url = sse_server_info['url']
@@ -259,7 +265,7 @@ async def test_both_stdio_and_sse_mcp(
        # ======= Test stdio server =======
        # Test browser server
        action_cmd_http = CmdRunAction(
-            command='python3 -m http.server 8080 > server.log 2>&1 &'
+            command=f'python3 -m http.server {dynamic_port} > server.log 2>&1 &'
        )
        logger.info(action_cmd_http, extra={'msg_type': 'ACTION'})
        obs_http = runtime.run_action(action_cmd_http)
@@ -280,7 +286,7 @@ async def test_both_stdio_and_sse_mcp(
            # And FastMCP Proxy will pre-pend the server name (in this case, `fetch`)
            # to the tool name, so the full tool name becomes `fetch_fetch`
            name='fetch',
-            arguments={'url': 'http://localhost:8080'},
+            arguments={'url': f'http://localhost:{dynamic_port}'},
        )
        obs_fetch = await runtime.call_tool_mcp(mcp_action_fetch)
        logger.info(obs_fetch, extra={'msg_type': 'OBSERVATION'})
@@ -294,7 +300,7 @@ async def test_both_stdio_and_sse_mcp(
        assert result_json['content'][0]['type'] == 'text'
        assert (
            result_json['content'][0]['text']
-            == 'Contents of http://localhost:8080/:\n---\n\n* <.downloads/>\n* <server.log>\n\n---'
+            == f'Contents of http://localhost:{dynamic_port}/:\n---\n\n* <.downloads/>\n* <server.log>\n\n---'
        )
    finally:
        if runtime:
@@ -305,7 +311,7 @@ async def test_both_stdio_and_sse_mcp(
@pytest.mark.skip('This test is flaky')
@pytest.mark.asyncio
 async def test_microagent_and_one_stdio_mcp_in_config(
-    temp_dir, runtime_cls, run_as_openhands
+    temp_dir, runtime_cls, run_as_openhands, dynamic_port
 ):
    runtime = None
    try:
@@ -350,7 +356,7 @@ async def test_microagent_and_one_stdio_mcp_in_config(
        # ======= Test the stdio server added by the microagent =======
        # Test browser server
        action_cmd_http = CmdRunAction(
-            command='python3 -m http.server 8080 > server.log 2>&1 &'
+            command=f'python3 -m http.server {dynamic_port} > server.log 2>&1 &'
        )
        logger.info(action_cmd_http, extra={'msg_type': 'ACTION'})
        obs_http = runtime.run_action(action_cmd_http)
@@ -367,7 +373,7 @@ async def test_microagent_and_one_stdio_mcp_in_config(
        assert obs_cat.exit_code == 0

        mcp_action_fetch = MCPAction(
-            name='fetch_fetch', arguments={'url': 'http://localhost:8080'}
+            name='fetch_fetch', arguments={'url': f'http://localhost:{dynamic_port}'}
        )
        obs_fetch = await runtime.call_tool_mcp(mcp_action_fetch)
        logger.info(obs_fetch, extra={'msg_type': 'OBSERVATION'})
@@ -381,7 +387,7 @@ async def test_microagent_and_one_stdio_mcp_in_config(
        assert result_json['content'][0]['type'] == 'text'
        assert (
            result_json['content'][0]['text']
-            == 'Contents of http://localhost:8080/:\n---\n\n* <.downloads/>\n* <server.log>\n\n---'
+            == f'Contents of http://localhost:{dynamic_port}/:\n---\n\n* <.downloads/>\n* <server.log>\n\n---'
        )
    finally:
        if runtime:
--- a/tests/unit/runtime/utils/test_windows_bash.py
+++ b/tests/unit/runtime/utils/test_windows_bash.py
@@ -1,6 +1,5 @@
 import os
 import sys
-import tempfile
 import time
 from pathlib import Path
 from unittest.mock import MagicMock, patch
@@ -30,18 +29,11 @@ pytestmark = pytest.mark.skipif(


@pytest.fixture
-def temp_work_dir():
-    """Create a temporary directory for testing."""
-    with tempfile.TemporaryDirectory() as temp_dir:
-        yield temp_dir
-
-
-@pytest.fixture
-def windows_bash_session(temp_work_dir):
+def windows_bash_session(temp_dir):
    """Create a WindowsPowershellSession instance for testing."""
    # Instantiate the class. Initialization happens in __init__.
    session = WindowsPowershellSession(
-        work_dir=temp_work_dir,
+        work_dir=temp_dir,
        username=None,
    )
    assert session._initialized  # Should be true after __init__
@@ -169,8 +161,8 @@ def test_command_timeout(windows_bash_session):
    assert abs(duration - test_timeout_sec) < 0.5  # Allow some buffer


-def test_long_running_command(windows_bash_session):
-    action = CmdRunAction(command='python -u -m http.server 8081')
+def test_long_running_command(windows_bash_session, dynamic_port):
+    action = CmdRunAction(command=f'python -u -m http.server {dynamic_port}')
    action.set_hard_timeout(1)
    result = windows_bash_session.execute(action)

@@ -195,7 +187,7 @@ def test_long_running_command(windows_bash_session):
    assert result.exit_code == 0

    # Verify the server is actually stopped by starting another one on the same port
-    action = CmdRunAction(command='python -u -m http.server 8081')
+    action = CmdRunAction(command=f'python -u -m http.server {dynamic_port}')
    action.set_hard_timeout(1)  # Set a short timeout to check if it starts
    result = windows_bash_session.execute(action)

@@ -247,10 +239,10 @@ def test_multiple_commands_rejected_and_individual_execution(windows_bash_sessio
        results.append(obs.content.strip())  # Strip trailing newlines for comparison


-def test_working_directory(windows_bash_session, temp_work_dir):
+def test_working_directory(windows_bash_session, temp_dir):
    """Test working directory handling."""
    initial_cwd = windows_bash_session._cwd
-    abs_temp_work_dir = os.path.abspath(temp_work_dir)
+    abs_temp_work_dir = os.path.abspath(temp_dir)
    assert initial_cwd == abs_temp_work_dir

    # Create a subdirectory
@@ -414,7 +406,7 @@ def test_runspace_state_after_error(windows_bash_session):
    assert valid_result.exit_code == 0


-def test_stateful_file_operations(windows_bash_session, temp_work_dir):
+def test_stateful_file_operations(windows_bash_session, temp_dir):
    """Test file operations to verify runspace state persistence.

    This test verifies that:
@@ -422,7 +414,7 @@ def test_stateful_file_operations(windows_bash_session, temp_work_dir):
    2. File operations work correctly relative to the current directory
    3. The runspace maintains state for path-dependent operations
    """
-    abs_temp_work_dir = os.path.abspath(temp_work_dir)
+    abs_temp_work_dir = os.path.abspath(temp_dir)

    # 1. Create a subdirectory
    sub_dir_name = 'file_test_dir'
@@ -582,10 +574,10 @@ def test_interactive_input(windows_bash_session):
    assert result.exit_code == 1


-def test_windows_path_handling(windows_bash_session, temp_work_dir):
+def test_windows_path_handling(windows_bash_session, temp_dir):
    """Test that os.chdir works with both forward slashes and escaped backslashes on Windows."""
    # Create a test directory
-    test_dir = Path(temp_work_dir) / 'test_dir'
+    test_dir = Path(temp_dir) / 'test_dir'
    test_dir.mkdir()

    # Test both path formats