Add tests for Git LFS and clone depth support

Add Git LFS and clone depth support
2026-04-29 03:00:45 -04:00 · 2025-02-20 12:45:25 -05:00 · 2025-02-20 12:45:24 -05:00
80 changed files with 525 additions and 2048 deletions
--- a/.github/workflows/dummy-agent-test.yml
+++ b/.github/workflows/dummy-agent-test.yml
@@ -24,10 +24,6 @@ jobs:
        uses: docker/setup-buildx-action@v3
      - name: Install tmux
        run: sudo apt-get update && sudo apt-get install -y tmux
-      - name: Setup Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: '22.x'
      - name: Install poetry via pipx
        run: pipx install poetry
      - name: Set up Python
--- a/.github/workflows/py-unit-tests.yml
+++ b/.github/workflows/py-unit-tests.yml
@@ -32,10 +32,6 @@ jobs:
        uses: docker/setup-buildx-action@v3
      - name: Install tmux
        run: sudo apt-get update && sudo apt-get install -y tmux
-      - name: Setup Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: '22.x'
      - name: Install poetry via pipx
        run: pipx install poetry
      - name: Set up Python
@@ -48,7 +44,7 @@ jobs:
      - name: Build Environment
        run: make build
      - name: Run Tests
-        run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_long_term_memory.py
+        run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_memory.py
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v5
        env:
--- a/4
+++ b/4
@@ -81,10 +81,10 @@ check-nodejs:
 	@if command -v node > /dev/null; then \
 		NODE_VERSION=$(shell node --version | sed -E 's/v//g'); \
 		IFS='.' read -r -a NODE_VERSION_ARRAY <<< "$$NODE_VERSION"; \
-		if [ "$${NODE_VERSION_ARRAY[0]}" -ge 22 ]; then \
+		if [ "$${NODE_VERSION_ARRAY[0]}" -ge 20 ]; then \
 			echo "$(BLUE)Node.js $$NODE_VERSION is already installed.$(RESET)"; \
 		else \
-			echo "$(RED)Node.js 22.x or later is required. Please install Node.js 22.x or later to continue.$(RESET)"; \
+			echo "$(RED)Node.js 20.x or later is required. Please install Node.js 20.x or later to continue.$(RESET)"; \
 			exit 1; \
 		fi; \
 	else \
--- a/config.template.toml
+++ b/config.template.toml
@@ -17,12 +17,6 @@
 #modal_api_token_id = ""
 #modal_api_token_secret = ""

-# API key for Daytona
-#daytona_api_key = ""
-
-# Daytona Target
-#daytona_target = ""
-
 # Base path for the workspace
 workspace_base = "./workspace"

@@ -83,9 +77,6 @@ workspace_base = "./workspace"
 # Runtime environment
 #runtime = "docker"

-# Runtime executor
-#runtime_executor = "openhands.runtime.executor:ActionExecutor"
-
 # Name of the default agent
 #default_agent = "CodeActAgent"

@@ -243,10 +234,6 @@ codeact_enable_jupyter = true
 # List of microagents to disable
 #disabled_microagents = []

-# Whether history should be truncated to continue the session when hitting LLM context
-# length limit
-enable_history_truncation = true
-
 [agent.RepoExplorerAgent]
 # Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
 # useful when an agent doesn't demand high quality but uses a lot of tokens
--- a/docs/modules/usage/configuration-options.md
+++ b/docs/modules/usage/configuration-options.md
@@ -340,11 +340,6 @@ The agent configuration options are defined in the `[agent]` and `[agent.<agent_
  - Default: `false`
  - Description: Whether Jupyter is enabled in the action space

- `enable_history_truncation`
-  - Type: `bool`
-  - Default: `true`
-  - Description: Whether history should be truncated to continue the session when hitting LLM context length limit
-
 ### Microagent Usage
 - `enable_prompt_extensions`
  - Type: `bool`
--- a/evaluation/README.md
+++ b/evaluation/README.md
@@ -20,8 +20,6 @@ To evaluate an agent, you can provide the agent's name to the `run_infer.py` pro
 ### Evaluating Different LLMs

 OpenHands in development mode uses `config.toml` to keep track of most configuration.
-**IMPORTANT: For evaluation, only the LLM section in `config.toml` will be used. Other configurations, such as `save_trajectory_path`, are not applied during evaluation.**
-
 Here's an example configuration file you can use to define and use multiple LLMs:

 ```toml
@@ -42,8 +40,6 @@ api_key = "XXX"
 temperature = 0.0
 ```

-For other configurations specific to evaluation, such as `save_trajectory_path`, these are typically set in the `get_config` function of the respective `run_infer.py` file for each benchmark.
-
 ## Supported Benchmarks

 The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), [miscellaneous assistance](#misc-assistance), and [real-world](#real-world) tasks.
--- a/frontend/src/api/open-hands.ts
+++ b/frontend/src/api/open-hands.ts
@@ -239,6 +239,9 @@ class OpenHands {
      body,
    );

+    // TODO: remove this once we have a multi-conversation UI
+    localStorage.setItem("latest_conversation_id", data.conversation_id);
+
    return data;
  }

--- a/frontend/src/components/features/conversation-panel/conversation-panel.tsx
+++ b/frontend/src/components/features/conversation-panel/conversation-panel.tsx
@@ -73,13 +73,11 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
    <div
      ref={ref}
      data-testid="conversation-panel"
-      className="w-[350px] h-full border border-neutral-700 bg-base-secondary rounded-xl overflow-y-auto absolute"
+      className="w-[350px] h-full border border-neutral-700 bg-base-secondary rounded-xl overflow-y-auto"
    >
-      {isFetching && (
-        <div className="w-full h-full absolute flex justify-center items-center">
-          <LoadingSpinner size="small" />
-        </div>
-      )}
+      <div className="pt-4 px-4 flex items-center justify-between">
+        {isFetching && <LoadingSpinner size="small" />}
+      </div>
      {error && (
        <div className="flex flex-col items-center justify-center h-full">
          <p className="text-danger">{error.message}</p>
--- a/frontend/src/components/features/settings/styled-switch-component.tsx
+++ b/frontend/src/components/features/settings/styled-switch-component.tsx
@@ -12,14 +12,13 @@ export function StyledSwitchComponent({
      className={cn(
        "w-12 h-6 rounded-xl flex items-center p-1.5 cursor-pointer",
        isToggled && "justify-end bg-primary",
-        !isToggled &&
-          "justify-start bg-base-secondary border border-tertiary-light",
+        !isToggled && "justify-start bg-[#1F2228] border border-tertiary-alt",
      )}
    >
      <div
        className={cn(
-          "w-3 h-3 rounded-xl",
-          isToggled ? "bg-base-secondary" : "bg-tertiary-light",
+          "bg-[#1F2228] w-3 h-3 rounded-xl",
+          isToggled ? "bg-[#1F2228]" : "bg-tertiary-alt",
        )}
      />
    </div>
--- a/frontend/src/components/shared/modals/settings/settings-form.tsx
+++ b/frontend/src/components/shared/modals/settings/settings-form.tsx
@@ -10,6 +10,7 @@ import { useEndSession } from "#/hooks/use-end-session";
 import { ModalBackdrop } from "../modal-backdrop";
 import { ModelSelector } from "./model-selector";
 import { useCurrentSettings } from "#/context/settings-context";
+import { MEMORY_CONDENSER } from "#/utils/feature-flags";
 import { Settings } from "#/types/settings";
 import { BrandButton } from "#/components/features/settings/brand-button";
 import { KeyStatusIcon } from "#/components/features/settings/key-status-icon";
@@ -43,6 +44,9 @@ export function SettingsForm({ settings, models, onClose }: SettingsFormProps) {
  const handleFormSubmission = async (formData: FormData) => {
    const newSettings = extractSettings(formData);

+    // Inject the condenser config from the current feature flag value
+    newSettings.ENABLE_DEFAULT_CONDENSER = MEMORY_CONDENSER;
+
    await saveUserSettings(newSettings);
    onClose();
    resetOngoingSession();
--- a/frontend/src/hooks/mutation/use-save-settings.ts
+++ b/frontend/src/hooks/mutation/use-save-settings.ts
@@ -2,6 +2,7 @@ import { useMutation, useQueryClient } from "@tanstack/react-query";
 import { DEFAULT_SETTINGS } from "#/services/settings";
 import OpenHands from "#/api/open-hands";
 import { PostSettings, PostApiSettings } from "#/types/settings";
+import { MEMORY_CONDENSER } from "#/utils/feature-flags";

 const saveSettingsMutationFn = async (settings: Partial<PostSettings>) => {
  const resetLlmApiKey = settings.LLM_API_KEY === "";
@@ -19,7 +20,8 @@ const saveSettingsMutationFn = async (settings: Partial<PostSettings>) => {
    remote_runtime_resource_factor: settings.REMOTE_RUNTIME_RESOURCE_FACTOR,
    github_token: settings.github_token,
    unset_github_token: settings.unset_github_token,
-    enable_default_condenser: settings.ENABLE_DEFAULT_CONDENSER,
+    enable_default_condenser:
+      MEMORY_CONDENSER || settings.ENABLE_DEFAULT_CONDENSER,
    user_consents_to_analytics: settings.user_consents_to_analytics,
  };

--- a/frontend/src/i18n/index.ts
+++ b/frontend/src/i18n/index.ts
@@ -26,7 +26,6 @@ i18n
  .init({
    fallbackLng: "en",
    debug: import.meta.env.NODE_ENV === "development",
-    load: "languageOnly",
  });

 export default i18n;
--- a/frontend/src/routes/_oh._index/route.tsx
+++ b/frontend/src/routes/_oh._index/route.tsx
@@ -1,6 +1,8 @@
 import React from "react";
 import { useDispatch } from "react-redux";
+import { useTranslation } from "react-i18next";
 import posthog from "posthog-js";
+import { I18nKey } from "#/i18n/declaration";
 import { setImportedProjectZip } from "#/state/initial-query-slice";
 import { convertZipToBase64 } from "#/utils/convert-zip-to-base64";
 import { useGitHubUser } from "#/hooks/query/use-github-user";
@@ -12,6 +14,7 @@ import { HeroHeading } from "#/components/shared/hero-heading";
 import { TaskForm } from "#/components/shared/task-form";

 function Home() {
+  const { t } = useTranslation();
  const dispatch = useDispatch();
  const formRef = React.useRef<HTMLFormElement>(null);

@@ -23,6 +26,8 @@ function Home() {
    gitHubClientId: config?.GITHUB_CLIENT_ID || null,
  });

+  const latestConversation = localStorage.getItem("latest_conversation_id");
+
  return (
    <div className="bg-base-secondary h-full rounded-xl flex flex-col items-center justify-center relative overflow-y-auto px-2">
      <HeroHeading />
@@ -51,6 +56,19 @@ function Home() {
          />
        </div>
      </div>
+      {latestConversation && (
+        <div className="flex gap-4 w-full text-center mt-8">
+          <p className="text-center w-full">
+            {t(I18nKey.LANDING$OR)}&nbsp;
+            <a
+              className="underline"
+              href={`/conversations/${latestConversation}`}
+            >
+              {t(I18nKey.LANDING$RECENT_CONVERSATION)}
+            </a>
+          </p>
+        </div>
+      )}
    </div>
  );
 }
--- a/frontend/src/routes/account-settings.tsx
+++ b/frontend/src/routes/account-settings.tsx
@@ -102,8 +102,6 @@ function AccountSettings() {

    const userConsentsToAnalytics =
      formData.get("enable-analytics-switch")?.toString() === "on";
-    const enableMemoryCondenser =
-      formData.get("enable-memory-condenser-switch")?.toString() === "on";

    saveSettings(
      {
@@ -111,7 +109,6 @@ function AccountSettings() {
          formData.get("github-token-input")?.toString() || undefined,
        LANGUAGE: languageValue,
        user_consents_to_analytics: userConsentsToAnalytics,
-        ENABLE_DEFAULT_CONDENSER: enableMemoryCondenser,
        LLM_MODEL: customLlmModel || fullLlmModel,
        LLM_BASE_URL: formData.get("base-url-input")?.toString() || "",
        LLM_API_KEY:
@@ -293,17 +290,6 @@ function AccountSettings() {
                Enable confirmation mode
              </SettingsSwitch>
            )}
-
-            {llmConfigMode === "advanced" && (
-              <SettingsSwitch
-                testId="enable-memory-condenser-switch"
-                name="enable-memory-condenser-switch"
-                defaultIsToggled={!!settings.ENABLE_DEFAULT_CONDENSER}
-              >
-                Enable memory condensation
-              </SettingsSwitch>
-            )}
-
            {llmConfigMode === "advanced" && confirmationModeIsEnabled && (
              <div>
                <SettingsDropdownInput
--- a/frontend/src/services/settings.ts
+++ b/frontend/src/services/settings.ts
@@ -12,7 +12,7 @@ export const DEFAULT_SETTINGS: Settings = {
  SECURITY_ANALYZER: "",
  REMOTE_RUNTIME_RESOURCE_FACTOR: 1,
  GITHUB_TOKEN_IS_SET: false,
-  ENABLE_DEFAULT_CONDENSER: true,
+  ENABLE_DEFAULT_CONDENSER: false,
  USER_CONSENTS_TO_ANALYTICS: false,
 };

--- a/frontend/src/utils/feature-flags.ts
+++ b/frontend/src/utils/feature-flags.ts
@@ -12,4 +12,5 @@ function loadFeatureFlag(
  }
 }

+export const MEMORY_CONDENSER = loadFeatureFlag("MEMORY_CONDENSER");
 export const BILLING_SETTINGS = () => loadFeatureFlag("BILLING_SETTINGS");
--- a/frontend/src/utils/settings-utils.ts
+++ b/frontend/src/utils/settings-utils.ts
@@ -25,7 +25,6 @@ const extractAdvancedFormData = (formData: FormData) => {
  let LLM_BASE_URL: string | undefined;
  let CONFIRMATION_MODE = false;
  let SECURITY_ANALYZER: string | undefined;
-  let ENABLE_DEFAULT_CONDENSER = true;

  if (isUsingAdvancedOptions) {
    CUSTOM_LLM_MODEL = formData.get("custom-model")?.toString();
@@ -35,7 +34,6 @@ const extractAdvancedFormData = (formData: FormData) => {
      // only set securityAnalyzer if confirmationMode is enabled
      SECURITY_ANALYZER = formData.get("security-analyzer")?.toString();
    }
-    ENABLE_DEFAULT_CONDENSER = keys.includes("enable-default-condenser");
  }

  return {
@@ -43,7 +41,6 @@ const extractAdvancedFormData = (formData: FormData) => {
    LLM_BASE_URL,
    CONFIRMATION_MODE,
    SECURITY_ANALYZER,
-    ENABLE_DEFAULT_CONDENSER,
  };
 };

@@ -56,7 +53,6 @@ export const extractSettings = (formData: FormData): Partial<Settings> => {
    LLM_BASE_URL,
    CONFIRMATION_MODE,
    SECURITY_ANALYZER,
-    ENABLE_DEFAULT_CONDENSER,
  } = extractAdvancedFormData(formData);

  return {
@@ -67,6 +63,5 @@ export const extractSettings = (formData: FormData): Partial<Settings> => {
    LLM_BASE_URL,
    CONFIRMATION_MODE,
    SECURITY_ANALYZER,
-    ENABLE_DEFAULT_CONDENSER,
  };
 };
--- a/openhands/agenthub/browsing_agent/response_parser.py
+++ b/openhands/agenthub/browsing_agent/response_parser.py
@@ -10,24 +10,17 @@ from openhands.events.action import (


 class BrowsingResponseParser(ResponseParser):
-    def __init__(self) -> None:
+    def __init__(self):
        # Need to pay attention to the item order in self.action_parsers
        super().__init__()
        self.action_parsers = [BrowsingActionParserMessage()]
        self.default_parser = BrowsingActionParserBrowseInteractive()

-    def parse(
-        self, response: str | dict[str, list[dict[str, dict[str, str | None]]]]
-    ) -> Action:
-        if isinstance(response, str):
-            action_str = response
-        else:
-            action_str = self.parse_response(response)
+    def parse(self, response: str) -> Action:
+        action_str = self.parse_response(response)
        return self.parse_action(action_str)

-    def parse_response(
-        self, response: dict[str, list[dict[str, dict[str, str | None]]]]
-    ) -> str:
+    def parse_response(self, response) -> str:
        action_str = response['choices'][0]['message']['content']
        if action_str is None:
            return ''
@@ -54,7 +47,9 @@ class BrowsingActionParserMessage(ActionParser):
    - BrowseInteractiveAction(browser_actions) - unexpected response format, message back to user
    """

-    def __init__(self) -> None:
+    def __init__(
+        self,
+    ):
        pass

    def check_condition(self, action_str: str) -> bool:
@@ -74,7 +69,9 @@ class BrowsingActionParserBrowseInteractive(ActionParser):
    - BrowseInteractiveAction(browser_actions) - handle send message to user function call in BrowserGym
    """

-    def __init__(self) -> None:
+    def __init__(
+        self,
+    ):
        pass

    def check_condition(self, action_str: str) -> bool:
--- a/openhands/agenthub/browsing_agent/utils.py
+++ b/openhands/agenthub/browsing_agent/utils.py
@@ -5,7 +5,7 @@ from warnings import warn
 import yaml


-def yaml_parser(message: str) -> tuple[dict, bool, str]:
+def yaml_parser(message):
    """Parse a yaml message for the retry function."""
    # saves gpt-3.5 from some yaml parsing errors
    message = re.sub(r':\s*\n(?=\S|\n)', ': ', message)
@@ -22,9 +22,7 @@ def yaml_parser(message: str) -> tuple[dict, bool, str]:
    return value, valid, retry_message


-def _compress_chunks(
-    text: str, identifier: str, skip_list: list[str], split_regex: str = '\n\n+'
-) -> tuple[dict[str, str], str]:
+def _compress_chunks(text, identifier, skip_list, split_regex='\n\n+'):
    """Compress a string by replacing redundant chunks by identifiers. Chunks are defined by the split_regex."""
    text_list = re.split(split_regex, text)
    text_list = [chunk.strip() for chunk in text_list]
@@ -46,7 +44,7 @@ def _compress_chunks(
    return def_dict, compressed_text


-def compress_string(text: str) -> str:
+def compress_string(text):
    """Compress a string by replacing redundant paragraphs and lines with identifiers."""
    # Perform paragraph-level compression
    def_dict, compressed_text = _compress_chunks(
@@ -69,7 +67,7 @@ def compress_string(text: str) -> str:
    return definitions + '\n' + compressed_text


-def extract_html_tags(text: str, keys: list[str]) -> dict[str, list[str]]:
+def extract_html_tags(text, keys):
    """Extract the content within HTML tags for a list of keys.

    Parameters
@@ -104,12 +102,7 @@ class ParseError(Exception):
    pass


-def parse_html_tags_raise(
-    text: str,
-    keys: list[str] | None = None,
-    optional_keys: list[str] | None = None,
-    merge_multiple: bool = False,
-) -> dict[str, str]:
+def parse_html_tags_raise(text, keys=(), optional_keys=(), merge_multiple=False):
    """A version of parse_html_tags that raises an exception if the parsing is not successful."""
    content_dict, valid, retry_message = parse_html_tags(
        text, keys, optional_keys, merge_multiple=merge_multiple
@@ -119,12 +112,7 @@ def parse_html_tags_raise(
    return content_dict


-def parse_html_tags(
-    text: str,
-    keys: list[str] | None = None,
-    optional_keys: list[str] | None = None,
-    merge_multiple: bool = False,
-) -> tuple[dict[str, str], bool, str]:
+def parse_html_tags(text, keys=(), optional_keys=(), merge_multiple=False):
    """Satisfy the parse api, extracts 1 match per key and validates that all keys are present

    Parameters
@@ -145,12 +133,9 @@ def parse_html_tags(
    str
        A message to be displayed to the agent if the parsing was not successful.
    """
-    keys = keys or []
-    optional_keys = optional_keys or []
-    all_keys = list(keys) + list(optional_keys)
+    all_keys = tuple(keys) + tuple(optional_keys)
    content_dict = extract_html_tags(text, all_keys)
    retry_messages = []
-    result_dict: dict[str, str] = {}

    for key in all_keys:
        if key not in content_dict:
@@ -158,6 +143,7 @@ def parse_html_tags(
                retry_messages.append(f'Missing the key <{key}> in the answer.')
        else:
            val = content_dict[key]
+            content_dict[key] = val[0]
            if len(val) > 1:
                if not merge_multiple:
                    retry_messages.append(
@@ -165,10 +151,8 @@ def parse_html_tags(
                    )
                else:
                    # merge the multiple instances
-                    result_dict[key] = '\n'.join(val)
-            else:
-                result_dict[key] = val[0]
+                    content_dict[key] = '\n'.join(val)

    valid = len(retry_messages) == 0
    retry_message = '\n'.join(retry_messages)
-    return result_dict, valid, retry_message
+    return content_dict, valid, retry_message
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@@ -475,9 +475,8 @@ def combine_thought(action: Action, thought: str) -> Action:
 def response_to_actions(response: ModelResponse) -> list[Action]:
    actions: list[Action] = []
    assert len(response.choices) == 1, 'Only one choice is supported for now'
-    choice = response.choices[0]
-    assistant_msg = choice.message
-    if hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls:
+    assistant_msg = response.choices[0].message
+    if assistant_msg.tool_calls:
        # Check if there's assistant_msg.content. If so, add it to the thought
        thought = ''
        if isinstance(assistant_msg.content, str):
@@ -593,10 +592,7 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
            actions.append(action)
    else:
        actions.append(
-            MessageAction(
-                content=str(assistant_msg.content) if assistant_msg.content else '',
-                wait_for_response=True,
-            )
+            MessageAction(content=assistant_msg.content, wait_for_response=True)
        )

    assert len(actions) >= 1
--- a/openhands/agenthub/micro/agent.py
+++ b/openhands/agenthub/micro/agent.py
@@ -22,7 +22,7 @@ def parse_response(orig_response: str) -> Action:
    return action_from_dict(action_dict)


-def to_json(obj: object, **kwargs: dict) -> str:
+def to_json(obj, **kwargs):
    """Serialize an object to str format"""
    return json.dumps(obj, **kwargs)

@@ -32,9 +32,7 @@ class MicroAgent(Agent):
    prompt = ''
    agent_definition: dict = {}

-    def history_to_json(
-        self, history: list[Event], max_events: int = 20, **kwargs: dict
-    ) -> str:
+    def history_to_json(self, history: list[Event], max_events: int = 20, **kwargs):
        """
        Serialize and simplify history to str format
        """
@@ -62,7 +60,7 @@ class MicroAgent(Agent):
        super().__init__(llm, config)
        if 'name' not in self.agent_definition:
            raise ValueError('Agent definition must contain a name')
-        self.prompt_template = Environment(loader=BaseLoader()).from_string(self.prompt)
+        self.prompt_template = Environment(loader=BaseLoader).from_string(self.prompt)
        self.delegates = all_microagents.copy()
        del self.delegates[self.agent_definition['name']]

@@ -76,7 +74,7 @@ class MicroAgent(Agent):
            delegates=self.delegates,
            latest_user_message=last_user_message,
        )
-        content: list[TextContent | ImageContent] = [TextContent(text=prompt)]
+        content = [TextContent(text=prompt)]
        if self.llm.vision_is_active() and last_image_urls:
            content.append(ImageContent(image_urls=last_image_urls))
        message = Message(role='user', content=content)
--- a/openhands/agenthub/visualbrowsing_agent/visualbrowsing_agent.py
+++ b/openhands/agenthub/visualbrowsing_agent/visualbrowsing_agent.py
@@ -29,9 +29,7 @@ def get_error_prefix(obs: BrowserOutputObservation) -> str:
    return f'## Error from previous action:\n{obs.last_browser_action_error}\n'


-def create_goal_prompt(
-    goal: str, image_urls: list[str] | None
-) -> tuple[str, list[str]]:
+def create_goal_prompt(goal: str, image_urls: list[str] | None):
    goal_txt: str = f"""\
 # Instructions
 Review the current state of the page and all other information to find the best possible next action to accomplish your goal. Your answer will be interpreted and executed by a program, make sure to follow the formatting instructions.
@@ -54,7 +52,7 @@ def create_observation_prompt(
    focused_element: str,
    error_prefix: str,
    som_screenshot: str | None,
-) -> tuple[str, str | None]:
+):
    txt_observation = f"""
 # Observation of current step:
 {tabs}{axtree_txt}{focused_element}{error_prefix}
@@ -275,9 +273,7 @@ Note:
        observation_txt, som_screenshot = create_observation_prompt(
            cur_axtree_txt, tabs, focused_element, error_prefix, set_of_marks
        )
-        human_prompt: list[TextContent | ImageContent] = [
-            TextContent(type='text', text=goal_txt)
-        ]
+        human_prompt = [TextContent(type='text', text=goal_txt)]
        if len(goal_images) > 0:
            human_prompt.append(ImageContent(image_urls=goal_images))
        human_prompt.append(TextContent(type='text', text=observation_txt))
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -21,7 +21,6 @@ from openhands.core.exceptions import (
    AgentStuckInLoopError,
    FunctionCallNotExistsError,
    FunctionCallValidationError,
-    LLMContextWindowExceedError,
    LLMMalformedActionError,
    LLMNoActionError,
    LLMResponseError,
@@ -51,7 +50,7 @@ from openhands.events.observation import (
    NullObservation,
    Observation,
 )
-from openhands.events.serialization.event import event_to_trajectory, truncate_content
+from openhands.events.serialization.event import truncate_content
 from openhands.llm.llm import LLM

 # note: RESUME is only available on web GUI
@@ -149,13 +148,12 @@ class AgentController:
        # replay-related
        self._replay_manager = ReplayManager(replay_events)

-    async def close(self, set_stop_state=True) -> None:
+    async def close(self) -> None:
        """Closes the agent controller, canceling any ongoing tasks and unsubscribing from the event stream.

        Note that it's fairly important that this closes properly, otherwise the state is incomplete.
        """
-        if set_stop_state:
-            await self.set_agent_state_to(AgentState.STOPPED)
+        await self.set_agent_state_to(AgentState.STOPPED)

        # we made history, now is the time to rewrite it!
        # the final state.history will be used by external scripts like evals, tests, etc.
@@ -253,7 +251,6 @@ class AgentController:
                isinstance(e, litellm.AuthenticationError)
                or isinstance(e, litellm.BadRequestError)
                or isinstance(e, RateLimitError)
-                or isinstance(e, LLMContextWindowExceedError)
            ):
                reported = e
            await self._react_to_exception(reported)
@@ -701,13 +698,24 @@ class AgentController:
                    or 'prompt is too long' in error_str
                    or isinstance(e, ContextWindowExceededError)
                ):
-                    if self.agent.config.enable_history_truncation:
-                        self._handle_long_context_error()
-                        return
-                    else:
-                        raise LLMContextWindowExceedError()
-                else:
-                    raise e
+                    # When context window is exceeded, keep roughly half of agent interactions
+                    self.state.history = self._apply_conversation_window(
+                        self.state.history
+                    )
+
+                    # Save the ID of the first event in our truncated history for future reloading
+                    if self.state.history:
+                        self.state.start_id = self.state.history[0].id
+
+                    # Add an error event to trigger another step by the agent
+                    self.event_stream.add_event(
+                        AgentCondensationObservation(
+                            content='Trimming prompt to meet context window limitations'
+                        ),
+                        EventSource.AGENT,
+                    )
+                    return
+                raise e

        if action.runnable:
            if self.state.confirmation_mode and (
@@ -834,11 +842,6 @@ class AgentController:
        # Always load from the event stream to avoid losing history
        self._init_history()

-    def get_trajectory(self) -> list[dict]:
-        # state history could be partially hidden/truncated before controller is closed
-        assert self._closed
-        return [event_to_trajectory(event) for event in self.state.history]
-
    def _init_history(self) -> None:
        """Initializes the agent's history from the event stream.

@@ -964,22 +967,6 @@ class AgentController:
        # make sure history is in sync
        self.state.start_id = start_id

-    def _handle_long_context_error(self) -> None:
-        # When context window is exceeded, keep roughly half of agent interactions
-        self.state.history = self._apply_conversation_window(self.state.history)
-
-        # Save the ID of the first event in our truncated history for future reloading
-        if self.state.history:
-            self.state.start_id = self.state.history[0].id
-
-        # Add an error event to trigger another step by the agent
-        self.event_stream.add_event(
-            AgentCondensationObservation(
-                content='Trimming prompt to meet context window limitations'
-            ),
-            EventSource.AGENT,
-        )
-
    def _apply_conversation_window(self, events: list[Event]) -> list[Event]:
        """Cuts history roughly in half when context window is exceeded, preserving action-observation pairs
        and ensuring the first user message is always included.
--- a/openhands/core/cli.py
+++ b/openhands/core/cli.py
@@ -100,7 +100,6 @@ async def main(loop: asyncio.AbstractEventLoop):
    initial_user_action = MessageAction(content=task_str) if task_str else None

    sid = str(uuid4())
-    display_message(f'Session ID: {sid}')

    runtime = create_runtime(config, sid=sid, headless_mode=True)
    await runtime.connect()
--- a/openhands/core/config/agent_config.py
+++ b/openhands/core/config/agent_config.py
@@ -18,7 +18,6 @@ class AgentConfig(BaseModel):
        enable_prompt_extensions: Whether to use prompt extensions (e.g., microagents, inject runtime info). Default is True.
        disabled_microagents: A list of microagents to disable. Default is None.
        condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
-        enable_history_truncation: If history should be truncated once LLM context limit is hit.
    """

    codeact_enable_browsing: bool = Field(default=True)
@@ -32,4 +31,3 @@ class AgentConfig(BaseModel):
    enable_prompt_extensions: bool = Field(default=True)
    disabled_microagents: list[str] | None = Field(default=None)
    condenser: CondenserConfig = Field(default_factory=NoOpCondenserConfig)
-    enable_history_truncation: bool = Field(default=True)
--- a/openhands/core/config/app_config.py
+++ b/openhands/core/config/app_config.py
@@ -75,9 +75,6 @@ class AppConfig(BaseModel):
    file_uploads_restrict_file_types: bool = Field(default=False)
    file_uploads_allowed_extensions: list[str] = Field(default_factory=lambda: ['.*'])
    runloop_api_key: SecretStr | None = Field(default=None)
-    daytona_api_key: SecretStr | None = Field(default=None)
-    daytona_api_url: str = Field(default='https://app.daytona.io/api')
-    daytona_target: str = Field(default='us')
    cli_multiline_input: bool = Field(default=False)
    conversation_max_age_seconds: int = Field(default=864000)  # 10 days in seconds

--- a/openhands/core/config/condenser_config.py
+++ b/openhands/core/config/condenser_config.py
@@ -26,10 +26,8 @@ class RecentEventsCondenserConfig(BaseModel):
    """Configuration for RecentEventsCondenser."""

    type: Literal['recent'] = Field('recent')
-
-    # at least one event by default, because the best guess is that it is the user task
    keep_first: int = Field(
-        default=1,
+        default=0,
        description='The number of initial events to condense.',
        ge=0,
    )
@@ -45,8 +43,6 @@ class LLMSummarizingCondenserConfig(BaseModel):
    llm_config: LLMConfig = Field(
        ..., description='Configuration for the LLM to use for condensing.'
    )
-
-    # at least one event by default, because the best guess is that it's the user task
    keep_first: int = Field(
        default=1,
        description='The number of initial events to condense.',
@@ -66,10 +62,8 @@ class AmortizedForgettingCondenserConfig(BaseModel):
        description='Maximum size of the condensed history before triggering forgetting.',
        ge=2,
    )
-
-    # at least one event by default, because the best guess is that it's the user task
    keep_first: int = Field(
-        default=1,
+        default=0,
        description='Number of initial events to always keep in history.',
        ge=0,
    )
@@ -87,10 +81,8 @@ class LLMAttentionCondenserConfig(BaseModel):
        description='Maximum size of the condensed history before triggering forgetting.',
        ge=2,
    )
-
-    # at least one event by default, because the best guess is that it's the user task
    keep_first: int = Field(
-        default=1,
+        default=0,
        description='Number of initial events to always keep in history.',
        ge=0,
    )
--- a/openhands/core/config/config_utils.py
+++ b/openhands/core/config/config_utils.py
@@ -25,20 +25,14 @@ def get_field_info(field: FieldInfo) -> dict[str, Any]:
    # Note: this only works for UnionTypes with None as one of the types
    if get_origin(field_type) is UnionType:
        types = get_args(field_type)
-        non_none_arg = next(
-            (t for t in types if t is not None and t is not type(None)), None
-        )
+        non_none_arg = next((t for t in types if t is not type(None)), None)
        if non_none_arg is not None:
            field_type = non_none_arg
            optional = True

    # type name in a pretty format
    type_name = (
-        str(field_type)
-        if field_type is None
-        else (
-            field_type.__name__ if hasattr(field_type, '__name__') else str(field_type)
-        )
+        field_type.__name__ if hasattr(field_type, '__name__') else str(field_type)
    )

    # default is always present
--- a/openhands/core/exceptions.py
+++ b/openhands/core/exceptions.py
@@ -10,17 +10,17 @@ class AgentError(Exception):


 class AgentNoInstructionError(AgentError):
-    def __init__(self, message: str = 'Instruction must be provided') -> None:
+    def __init__(self, message='Instruction must be provided'):
        super().__init__(message)


 class AgentEventTypeError(AgentError):
-    def __init__(self, message: str = 'Event must be a dictionary') -> None:
+    def __init__(self, message='Event must be a dictionary'):
        super().__init__(message)


 class AgentAlreadyRegisteredError(AgentError):
-    def __init__(self, name: str | None = None) -> None:
+    def __init__(self, name=None):
        if name is not None:
            message = f"Agent class already registered under '{name}'"
        else:
@@ -29,7 +29,7 @@ class AgentAlreadyRegisteredError(AgentError):


 class AgentNotRegisteredError(AgentError):
-    def __init__(self, name: str | None = None) -> None:
+    def __init__(self, name=None):
        if name is not None:
            message = f"No agent class registered under '{name}'"
        else:
@@ -38,7 +38,7 @@ class AgentNotRegisteredError(AgentError):


 class AgentStuckInLoopError(AgentError):
-    def __init__(self, message: str = 'Agent got stuck in a loop') -> None:
+    def __init__(self, message='Agent got stuck in a loop'):
        super().__init__(message)


@@ -48,7 +48,7 @@ class AgentStuckInLoopError(AgentError):


 class TaskInvalidStateError(Exception):
-    def __init__(self, state: str | None = None) -> None:
+    def __init__(self, state=None):
        if state is not None:
            message = f'Invalid state {state}'
        else:
@@ -64,47 +64,37 @@ class TaskInvalidStateError(Exception):
 # This exception gets sent back to the LLM
 # It might be malformed JSON
 class LLMMalformedActionError(Exception):
-    def __init__(self, message: str = 'Malformed response') -> None:
+    def __init__(self, message='Malformed response'):
        self.message = message
        super().__init__(message)

-    def __str__(self) -> str:
+    def __str__(self):
        return self.message


 # This exception gets sent back to the LLM
 # For some reason, the agent did not return an action
 class LLMNoActionError(Exception):
-    def __init__(self, message: str = 'Agent must return an action') -> None:
+    def __init__(self, message='Agent must return an action'):
        super().__init__(message)


 # This exception gets sent back to the LLM
 # The LLM output did not include an action, or the action was not the expected type
 class LLMResponseError(Exception):
-    def __init__(
-        self, message: str = 'Failed to retrieve action from LLM response'
-    ) -> None:
+    def __init__(self, message='Failed to retrieve action from LLM response'):
        super().__init__(message)


 class UserCancelledError(Exception):
-    def __init__(self, message: str = 'User cancelled the request') -> None:
+    def __init__(self, message='User cancelled the request'):
        super().__init__(message)


 class OperationCancelled(Exception):
    """Exception raised when an operation is cancelled (e.g. by a keyboard interrupt)."""

-    def __init__(self, message: str = 'Operation was cancelled') -> None:
-        super().__init__(message)
-
-
-class LLMContextWindowExceedError(RuntimeError):
-    def __init__(
-        self,
-        message: str = 'Conversation history longer than LLM context window limit. Consider turning on enable_history_truncation config to avoid this error',
-    ) -> None:
+    def __init__(self, message='Operation was cancelled'):
        super().__init__(message)


@@ -119,7 +109,7 @@ class FunctionCallConversionError(Exception):
    This typically happens when there's a malformed message (e.g., missing <function=...> tags). But not due to LLM output.
    """

-    def __init__(self, message: str) -> None:
+    def __init__(self, message):
        super().__init__(message)


@@ -129,14 +119,14 @@ class FunctionCallValidationError(Exception):
    This typically happens when the LLM outputs unrecognized function call / parameter names / values.
    """

-    def __init__(self, message: str) -> None:
+    def __init__(self, message):
        super().__init__(message)


 class FunctionCallNotExistsError(Exception):
    """Exception raised when an LLM call a tool that is not registered."""

-    def __init__(self, message: str) -> None:
+    def __init__(self, message):
        super().__init__(message)


@@ -193,17 +183,15 @@ class AgentRuntimeNotFoundError(AgentRuntimeUnavailableError):


 class BrowserInitException(Exception):
-    def __init__(
-        self, message: str = 'Failed to initialize browser environment'
-    ) -> None:
+    def __init__(self, message='Failed to initialize browser environment'):
        super().__init__(message)


 class BrowserUnavailableException(Exception):
    def __init__(
        self,
-        message: str = 'Browser environment is not available, please check if has been initialized',
-    ) -> None:
+        message='Browser environment is not available, please check if has been initialized',
+    ):
        super().__init__(message)


@@ -221,5 +209,5 @@ class MicroAgentError(Exception):
 class MicroAgentValidationError(MicroAgentError):
    """Raised when there's a validation error in microagent metadata."""

-    def __init__(self, message: str = 'Micro agent validation failed') -> None:
+    def __init__(self, message='Micro agent validation failed'):
        super().__init__(message)
--- a/openhands/core/logger.py
+++ b/openhands/core/logger.py
@@ -74,11 +74,10 @@ LOG_COLORS: Mapping[str, ColorType] = {


 class StackInfoFilter(logging.Filter):
-    def filter(self, record: logging.LogRecord) -> bool:
+    def filter(self, record):
        if record.levelno >= logging.ERROR:
-            # LogRecord attributes are dynamically typed
-            setattr(record, 'stack_info', True)
-            setattr(record, 'exc_info', sys.exc_info())
+            record.stack_info = True
+            record.exc_info = True
        return True


@@ -108,9 +107,9 @@ def strip_ansi(s: str) -> str:


 class ColoredFormatter(logging.Formatter):
-    def format(self, record: logging.LogRecord) -> str:
-        msg_type = record.__dict__.get('msg_type', '')
-        event_source = record.__dict__.get('event_source', '')
+    def format(self, record):
+        msg_type = record.__dict__.get('msg_type')
+        event_source = record.__dict__.get('event_source')
        if event_source:
            new_msg_type = f'{event_source.upper()}_{msg_type}'
            if new_msg_type in LOG_COLORS:
@@ -137,13 +136,12 @@ class ColoredFormatter(logging.Formatter):
        return super().format(new_record)


-def _fix_record(record: logging.LogRecord) -> logging.LogRecord:
+def _fix_record(record: logging.LogRecord):
    new_record = copy.copy(record)
    # The formatter expects non boolean values, and will raise an exception if there is a boolean - so we fix these
-    # LogRecord attributes are dynamically typed
-    if getattr(new_record, 'exc_info', None) is True:
-        setattr(new_record, 'exc_info', sys.exc_info())
-        setattr(new_record, 'stack_info', None)
+    if new_record.exc_info is True and not new_record.exc_text:  # type: ignore
+        new_record.exc_info = sys.exc_info()  # type: ignore
+        new_record.stack_info = None  # type: ignore
    return new_record


@@ -160,32 +158,32 @@ class RollingLogger:
    log_lines: list[str]
    all_lines: str

-    def __init__(self, max_lines: int = 10, char_limit: int = 80) -> None:
+    def __init__(self, max_lines=10, char_limit=80):
        self.max_lines = max_lines
        self.char_limit = char_limit
        self.log_lines = [''] * self.max_lines
        self.all_lines = ''

-    def is_enabled(self) -> bool:
+    def is_enabled(self):
        return DEBUG and sys.stdout.isatty()

-    def start(self, message: str = '') -> None:
+    def start(self, message=''):
        if message:
            print(message)
        self._write('\n' * self.max_lines)
        self._flush()

-    def add_line(self, line: str) -> None:
+    def add_line(self, line):
        self.log_lines.pop(0)
        self.log_lines.append(line[: self.char_limit])
        self.print_lines()
        self.all_lines += line + '\n'

-    def write_immediately(self, line: str) -> None:
+    def write_immediately(self, line):
        self._write(line)
        self._flush()

-    def print_lines(self) -> None:
+    def print_lines(self):
        """Display the last n log_lines in the console (not for file logging).

        This will create the effect of a rolling display in the console.
@@ -194,39 +192,37 @@ class RollingLogger:
        for line in self.log_lines:
            self.replace_current_line(line)

-    def move_back(self, amount: int = -1) -> None:
+    def move_back(self, amount=-1):
        r"""'\033[F' moves the cursor up one line."""
        if amount == -1:
            amount = self.max_lines
        self._write('\033[F' * (self.max_lines))
        self._flush()

-    def replace_current_line(self, line: str = '') -> None:
+    def replace_current_line(self, line=''):
        r"""'\033[2K\r' clears the line and moves the cursor to the beginning of the line."""
        self._write('\033[2K' + line + '\n')
        self._flush()

-    def _write(self, line: str) -> None:
+    def _write(self, line):
        if not self.is_enabled():
            return
        sys.stdout.write(line)

-    def _flush(self) -> None:
+    def _flush(self):
        if not self.is_enabled():
            return
        sys.stdout.flush()


 class SensitiveDataFilter(logging.Filter):
-    def filter(self, record: logging.LogRecord) -> bool:
+    def filter(self, record):
        # Gather sensitive values which should not ever appear in the logs.
        sensitive_values = []
        for key, value in os.environ.items():
            key_upper = key.upper()
-            if (
-                len(value) > 2
-                and value != 'default'
-                and any(s in key_upper for s in ('SECRET', 'KEY', 'CODE', 'TOKEN'))
+            if len(value) > 2 and any(
+                s in key_upper for s in ('SECRET', 'KEY', 'CODE', 'TOKEN')
            ):
                sensitive_values.append(value)

@@ -247,7 +243,6 @@ class SensitiveDataFilter(logging.Filter):
            'modal_api_token_secret',
            'llm_api_key',
            'sandbox_env_github_token',
-            'daytona_api_key',
        ]

        # add env var names
@@ -265,9 +260,7 @@ class SensitiveDataFilter(logging.Filter):
        return True


-def get_console_handler(
-    log_level: int = logging.INFO, extra_info: str | None = None
-) -> logging.StreamHandler:
+def get_console_handler(log_level: int = logging.INFO, extra_info: str | None = None):
    """Returns a console handler for logging."""
    console_handler = logging.StreamHandler()
    console_handler.setLevel(log_level)
@@ -278,9 +271,7 @@ def get_console_handler(
    return console_handler


-def get_file_handler(
-    log_dir: str, log_level: int = logging.INFO
-) -> logging.FileHandler:
+def get_file_handler(log_dir: str, log_level: int = logging.INFO):
    """Returns a file handler for logging."""
    os.makedirs(log_dir, exist_ok=True)
    timestamp = datetime.now().strftime('%Y-%m-%d')
@@ -354,13 +345,7 @@ logging.getLogger('LiteLLM Proxy').disabled = True
 class LlmFileHandler(logging.FileHandler):
    """LLM prompt and response logging."""

-    def __init__(
-        self,
-        filename: str,
-        mode: str = 'a',
-        encoding: str = 'utf-8',
-        delay: bool = False,
-    ) -> None:
+    def __init__(self, filename, mode='a', encoding='utf-8', delay=False):
        """Initializes an instance of LlmFileHandler.

        Args:
@@ -391,7 +376,7 @@ class LlmFileHandler(logging.FileHandler):
        self.baseFilename = os.path.join(self.log_directory, filename)
        super().__init__(self.baseFilename, mode, encoding, delay)

-    def emit(self, record: logging.LogRecord) -> None:
+    def emit(self, record):
        """Emits a log record.

        Args:
@@ -406,7 +391,7 @@ class LlmFileHandler(logging.FileHandler):
        self.message_counter += 1


-def _get_llm_file_handler(name: str, log_level: int) -> LlmFileHandler:
+def _get_llm_file_handler(name: str, log_level: int):
    # The 'delay' parameter, when set to True, postpones the opening of the log file
    # until the first log message is emitted.
    llm_file_handler = LlmFileHandler(name, delay=True)
@@ -415,7 +400,7 @@ def _get_llm_file_handler(name: str, log_level: int) -> LlmFileHandler:
    return llm_file_handler


-def _setup_llm_logger(name: str, log_level: int) -> logging.Logger:
+def _setup_llm_logger(name: str, log_level: int):
    logger = logging.getLogger(name)
    logger.propagate = False
    logger.setLevel(log_level)
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -27,6 +27,7 @@ from openhands.events.action.action import Action
 from openhands.events.event import Event
 from openhands.events.observation import AgentStateChangedObservation
 from openhands.events.serialization import event_from_dict
+from openhands.events.serialization.event import event_to_trajectory
 from openhands.io import read_input, read_task
 from openhands.runtime.base import Runtime

@@ -166,8 +167,6 @@ async def run_controller(
        # NOTE: the saved state does not include delegates events
        end_state.save_to_session(event_stream.sid, event_stream.file_store)

-    await controller.close(set_stop_state=False)
-
    state = controller.get_state()

    # save trajectories if applicable
@@ -178,7 +177,7 @@ async def run_controller(
        else:
            file_path = config.save_trajectory_path
        os.makedirs(os.path.dirname(file_path), exist_ok=True)
-        histories = controller.get_trajectory()
+        histories = [event_to_trajectory(event) for event in state.history]
        with open(file_path, 'w') as f:
            json.dump(histories, f)

--- a/openhands/core/message.py
+++ b/openhands/core/message.py
@@ -15,9 +15,7 @@ class Content(BaseModel):
    cache_prompt: bool = False

    @model_serializer
-    def serialize_model(
-        self,
-    ) -> dict[str, str | dict[str, str]] | list[dict[str, str | dict[str, str]]]:
+    def serialize_model(self):
        raise NotImplementedError('Subclasses should implement this method.')


@@ -26,7 +24,7 @@ class TextContent(Content):
    text: str

    @model_serializer
-    def serialize_model(self) -> dict[str, str | dict[str, str]]:
+    def serialize_model(self):
        data: dict[str, str | dict[str, str]] = {
            'type': self.type,
            'text': self.text,
@@ -41,7 +39,7 @@ class ImageContent(Content):
    image_urls: list[str]

    @model_serializer
-    def serialize_model(self) -> list[dict[str, str | dict[str, str]]]:
+    def serialize_model(self):
        images: list[dict[str, str | dict[str, str]]] = []
        for url in self.image_urls:
            images.append({'type': self.type, 'image_url': {'url': url}})
@@ -103,22 +101,15 @@ class Message(BaseModel):
            # See discussion here for details: https://github.com/BerriAI/litellm/issues/6422#issuecomment-2438765472
            if self.role == 'tool' and item.cache_prompt:
                role_tool_with_prompt_caching = True
-                if isinstance(item, TextContent):
-                    d.pop('cache_control', None)
-                elif isinstance(item, ImageContent):
-                    # ImageContent.model_dump() always returns a list
-                    # We know d is a list of dicts for ImageContent
-                    if hasattr(d, '__iter__'):
-                        for d_item in d:
-                            if hasattr(d_item, 'pop'):
-                                d_item.pop('cache_control', None)
-
+                if isinstance(d, dict):
+                    d.pop('cache_control')
+                elif isinstance(d, list):
+                    for d_item in d:
+                        d_item.pop('cache_control')
            if isinstance(item, TextContent):
                content.append(d)
            elif isinstance(item, ImageContent) and self.vision_enabled:
-                # ImageContent.model_dump() always returns a list
-                # We know d is a list for ImageContent
-                content.extend([d] if isinstance(d, dict) else d)
+                content.extend(d)

        message_dict: dict = {'content': content, 'role': self.role}

--- a/openhands/core/message_utils.py
+++ b/openhands/core/message_utils.py
@@ -29,7 +29,6 @@ from openhands.events.observation import (
 from openhands.events.observation.error import ErrorObservation
 from openhands.events.observation.observation import Observation
 from openhands.events.serialization.event import truncate_content
-from openhands.llm.metrics import Metrics, TokenUsage


 def events_to_messages(
@@ -160,7 +159,7 @@ def get_action_message(
        )

        llm_response: ModelResponse = tool_metadata.model_response
-        assistant_msg = getattr(llm_response.choices[0], 'message')
+        assistant_msg = llm_response.choices[0].message

        # Add the LLM message (assistant) that initiated the tool calls
        # (overwrites any previous message with the same response_id)
@@ -168,7 +167,7 @@ def get_action_message(
            f'Tool calls type: {type(assistant_msg.tool_calls)}, value: {assistant_msg.tool_calls}'
        )
        pending_tool_call_action_messages[llm_response.id] = Message(
-            role=getattr(assistant_msg, 'role', 'assistant'),
+            role=assistant_msg.role,
            # tool call content SHOULD BE a string
            content=[TextContent(text=assistant_msg.content or '')]
            if assistant_msg.content is not None
@@ -185,7 +184,7 @@ def get_action_message(
        tool_metadata = action.tool_call_metadata
        if tool_metadata is not None:
            # take the response message from the tool call
-            assistant_msg = getattr(tool_metadata.model_response.choices[0], 'message')
+            assistant_msg = tool_metadata.model_response.choices[0].message
            content = assistant_msg.content or ''

            # save content if any, to thought
@@ -197,11 +196,9 @@ def get_action_message(

            # remove the tool call metadata
            action.tool_call_metadata = None
-        if role not in ('user', 'system', 'assistant', 'tool'):
-            raise ValueError(f'Invalid role: {role}')
        return [
            Message(
-                role=role,  # type: ignore[arg-type]
+                role=role,
                content=[TextContent(text=action.thought)],
            )
        ]
@@ -210,11 +207,9 @@ def get_action_message(
        content = [TextContent(text=action.content or '')]
        if vision_is_active and action.image_urls:
            content.append(ImageContent(image_urls=action.image_urls))
-        if role not in ('user', 'system', 'assistant', 'tool'):
-            raise ValueError(f'Invalid role: {role}')
        return [
            Message(
-                role=role,  # type: ignore[arg-type]
+                role=role,
                content=content,
            )
        ]
@@ -222,7 +217,7 @@ def get_action_message(
        content = [TextContent(text=f'User executed the command:\n{action.command}')]
        return [
            Message(
-                role='user',  # Always user for CmdRunAction
+                role='user',
                content=content,
            )
        ]
@@ -356,58 +351,17 @@ def get_observation_message(


 def apply_prompt_caching(messages: list[Message]) -> None:
-    """Applies caching breakpoints to the messages.
-
-    For new Anthropic API, we only need to mark the last user or tool message as cacheable.
-    """
+    """Applies caching breakpoints to the messages."""
    # NOTE: this is only needed for anthropic
+    # following logic here:
+    # https://github.com/anthropics/anthropic-quickstarts/blob/8f734fd08c425c6ec91ddd613af04ff87d70c5a0/computer-use-demo/computer_use_demo/loop.py#L241-L262
+    breakpoints_remaining = 3  # remaining 1 for system/tool
    for message in reversed(messages):
        if message.role in ('user', 'tool'):
-            message.content[
-                -1
-            ].cache_prompt = True  # Last item inside the message content
-            break
-
-
-def get_token_usage_for_event(event: Event, metrics: Metrics) -> TokenUsage | None:
-    """
-    Returns at most one token usage record for the `model_response.id` in this event's
-    `tool_call_metadata`.
-
-    If no response_id is found, or none match in metrics.token_usages, returns None.
-    """
-    if event.tool_call_metadata and event.tool_call_metadata.model_response:
-        response_id = event.tool_call_metadata.model_response.get('id')
-        if response_id:
-            return next(
-                (
-                    usage
-                    for usage in metrics.token_usages
-                    if usage.response_id == response_id
-                ),
-                None,
-            )
-    return None
-
-
-def get_token_usage_for_event_id(
-    events: list[Event], event_id: int, metrics: Metrics
-) -> TokenUsage | None:
-    """
-    Starting from the event with .id == event_id and moving backwards in `events`,
-    find the first TokenUsage record (if any) associated with a response_id from
-    tool_call_metadata.model_response.id.
-
-    Returns the first match found, or None if none is found.
-    """
-    # find the index of the event with the given id
-    idx = next((i for i, e in enumerate(events) if e.id == event_id), None)
-    if idx is None:
-        return None
-
-    # search backward from idx down to 0
-    for i in range(idx, -1, -1):
-        usage = get_token_usage_for_event(events[i], metrics)
-        if usage is not None:
-            return usage
-    return None
+            if breakpoints_remaining > 0:
+                message.content[
+                    -1
+                ].cache_prompt = True  # Last item inside the message content
+                breakpoints_remaining -= 1
+            else:
+                break
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -8,7 +8,6 @@ from typing import Any, Callable
 import requests

 from openhands.core.config import LLMConfig
-from openhands.utils.ensure_httpx_close import EnsureHttpxClose

 with warnings.catch_warnings():
    warnings.simplefilter('ignore')
@@ -231,9 +230,9 @@ class LLM(RetryMixin, DebugMixin):

            # Record start time for latency measurement
            start_time = time.time()
-            with EnsureHttpxClose():
-                # we don't support streaming here, thus we get a ModelResponse
-                resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)
+
+            # we don't support streaming here, thus we get a ModelResponse
+            resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)

            # Calculate and record latency
            latency = time.time() - start_time
@@ -288,11 +287,7 @@ class LLM(RetryMixin, DebugMixin):
                    'messages': messages,
                    'response': resp,
                    'args': args,
-                    'kwargs': {
-                        k: v
-                        for k, v in kwargs.items()
-                        if k not in ('messages', 'client')
-                    },
+                    'kwargs': {k: v for k, v in kwargs.items() if k != 'messages'},
                    'timestamp': time.time(),
                    'cost': cost,
                }
@@ -502,21 +497,20 @@ class LLM(RetryMixin, DebugMixin):
            stats += 'Response Latency: %.3f seconds\n' % latest_latency.latency

        usage: Usage | None = response.get('usage')
-        response_id = response.get('id', 'unknown')

        if usage:
            # keep track of the input and output tokens
-            prompt_tokens = usage.get('prompt_tokens', 0)
-            completion_tokens = usage.get('completion_tokens', 0)
+            input_tokens = usage.get('prompt_tokens')
+            output_tokens = usage.get('completion_tokens')

-            if prompt_tokens:
-                stats += 'Input tokens: ' + str(prompt_tokens)
+            if input_tokens:
+                stats += 'Input tokens: ' + str(input_tokens)

-            if completion_tokens:
+            if output_tokens:
                stats += (
-                    (' | ' if prompt_tokens else '')
+                    (' | ' if input_tokens else '')
                    + 'Output tokens: '
-                    + str(completion_tokens)
+                    + str(output_tokens)
                    + '\n'
                )

@@ -525,7 +519,7 @@ class LLM(RetryMixin, DebugMixin):
                'prompt_tokens_details'
            )
            cache_hit_tokens = (
-                prompt_tokens_details.cached_tokens if prompt_tokens_details else 0
+                prompt_tokens_details.cached_tokens if prompt_tokens_details else None
            )
            if cache_hit_tokens:
                stats += 'Input tokens (cache hit): ' + str(cache_hit_tokens) + '\n'
@@ -534,20 +528,10 @@ class LLM(RetryMixin, DebugMixin):
            # but litellm doesn't separate them in the usage stats
            # so we can read it from the provider-specific extra field
            model_extra = usage.get('model_extra', {})
-            cache_write_tokens = model_extra.get('cache_creation_input_tokens', 0)
+            cache_write_tokens = model_extra.get('cache_creation_input_tokens')
            if cache_write_tokens:
                stats += 'Input tokens (cache write): ' + str(cache_write_tokens) + '\n'

-            # Record in metrics
-            # We'll treat cache_hit_tokens as "cache read" and cache_write_tokens as "cache write"
-            self.metrics.add_token_usage(
-                prompt_tokens=prompt_tokens,
-                completion_tokens=completion_tokens,
-                cache_read_tokens=cache_hit_tokens,
-                cache_write_tokens=cache_write_tokens,
-                response_id=response_id,
-            )
-
        # log the stats
        if stats:
            logger.debug(stats)
--- a/openhands/llm/metrics.py
+++ b/openhands/llm/metrics.py
@@ -17,23 +17,11 @@ class ResponseLatency(BaseModel):
    response_id: str


-class TokenUsage(BaseModel):
-    """Metric tracking detailed token usage per completion call."""
-
-    model: str
-    prompt_tokens: int
-    completion_tokens: int
-    cache_read_tokens: int
-    cache_write_tokens: int
-    response_id: str
-
-
 class Metrics:
    """Metrics class can record various metrics during running and evaluation.
-    We track:
-      - accumulated_cost and costs
-      - A list of ResponseLatency
-      - A list of TokenUsage (one per call).
+    Currently, we define the following metrics:
+        accumulated_cost: the total cost (USD $) of the current LLM.
+        response_latency: the time taken for each LLM completion call.
    """

    def __init__(self, model_name: str = 'default') -> None:
@@ -41,7 +29,6 @@ class Metrics:
        self._costs: list[Cost] = []
        self._response_latencies: list[ResponseLatency] = []
        self.model_name = model_name
-        self._token_usages: list[TokenUsage] = []

    @property
    def accumulated_cost(self) -> float:
@@ -67,16 +54,6 @@ class Metrics:
    def response_latencies(self, value: list[ResponseLatency]) -> None:
        self._response_latencies = value

-    @property
-    def token_usages(self) -> list[TokenUsage]:
-        if not hasattr(self, '_token_usages'):
-            self._token_usages = []
-        return self._token_usages
-
-    @token_usages.setter
-    def token_usages(self, value: list[TokenUsage]) -> None:
-        self._token_usages = value
-
    def add_cost(self, value: float) -> None:
        if value < 0:
            raise ValueError('Added cost cannot be negative.')
@@ -90,33 +67,10 @@ class Metrics:
            )
        )

-    def add_token_usage(
-        self,
-        prompt_tokens: int,
-        completion_tokens: int,
-        cache_read_tokens: int,
-        cache_write_tokens: int,
-        response_id: str,
-    ) -> None:
-        """Add a single usage record."""
-        self._token_usages.append(
-            TokenUsage(
-                model=self.model_name,
-                prompt_tokens=prompt_tokens,
-                completion_tokens=completion_tokens,
-                cache_read_tokens=cache_read_tokens,
-                cache_write_tokens=cache_write_tokens,
-                response_id=response_id,
-            )
-        )
-
    def merge(self, other: 'Metrics') -> None:
-        """Merge 'other' metrics into this one."""
        self._accumulated_cost += other.accumulated_cost
        self._costs += other._costs
-        # use the property so older picked objects that lack the field won't crash
-        self.token_usages += other.token_usages
-        self.response_latencies += other.response_latencies
+        self._response_latencies += other._response_latencies

    def get(self) -> dict:
        """Return the metrics in a dictionary."""
@@ -126,14 +80,12 @@ class Metrics:
            'response_latencies': [
                latency.model_dump() for latency in self._response_latencies
            ],
-            'token_usages': [usage.model_dump() for usage in self._token_usages],
        }

    def reset(self):
        self._accumulated_cost = 0.0
        self._costs = []
        self._response_latencies = []
-        self._token_usages = []

    def log(self):
        """Log the metrics."""
--- a/openhands/memory/init.py
+++ b/openhands/memory/init.py
@@ -1,4 +1,4 @@
 from openhands.memory.condenser import Condenser
-from openhands.memory.long_term_memory import LongTermMemory
+from openhands.memory.memory import LongTermMemory

 __all__ = ['LongTermMemory', 'Condenser']
--- a/openhands/memory/condenser/impl/llm_attention_condenser.py
+++ b/openhands/memory/condenser/impl/llm_attention_condenser.py
@@ -18,7 +18,7 @@ class ImportantEventSelection(BaseModel):
 class LLMAttentionCondenser(RollingCondenser):
    """Rolling condenser strategy that uses an LLM to select the most important events when condensing the history."""

-    def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 1):
+    def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 0):
        if keep_first >= max_size // 2:
            raise ValueError(
                f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
--- a/openhands/memory/condenser/impl/recent_events_condenser.py
+++ b/openhands/memory/condenser/impl/recent_events_condenser.py
@@ -8,7 +8,7 @@ from openhands.memory.condenser.condenser import Condenser
 class RecentEventsCondenser(Condenser):
    """A condenser that only keeps a certain number of the most recent events."""

-    def __init__(self, keep_first: int = 1, max_events: int = 10):
+    def __init__(self, keep_first: int = 0, max_events: int = 10):
        self.keep_first = keep_first
        self.max_events = max_events

--- a/openhands/memory/long_term_memory.py
+++ b/openhands/memory/long_term_memory.py
--- a/openhands/resolver/resolve_issue.py
+++ b/openhands/resolver/resolve_issue.py
@@ -429,14 +429,23 @@ async def resolve_issue(
    # checkout the repo
    repo_dir = os.path.join(output_dir, 'repo')
    if not os.path.exists(repo_dir):
-        checkout_output = subprocess.check_output(
-            [
-                'git',
-                'clone',
-                issue_handler.get_clone_url(),
-                f'{output_dir}/repo',
-            ]
-        ).decode('utf-8')
+        # Configure Git LFS to skip downloading large files if requested
+        if os.getenv('GIT_LFS_SKIP_SMUDGE') == '1':
+            subprocess.check_output(['git', 'config', '--global', 'filter.lfs.smudge', 'git-lfs smudge --skip'])
+            subprocess.check_output(['git', 'config', '--global', 'filter.lfs.process', 'git-lfs filter-process --skip'])
+
+        # Build git clone command
+        clone_cmd = ['git', 'clone']
+
+        # Add --depth if requested
+        if depth := os.getenv('GIT_CLONE_DEPTH'):
+            clone_cmd.extend(['--depth', depth])
+
+        # Add repository URL and destination
+        clone_cmd.extend([issue_handler.get_clone_url(), f'{output_dir}/repo'])
+
+        # Execute git clone
+        checkout_output = subprocess.check_output(clone_cmd).decode('utf-8')
        if 'fatal' in checkout_output:
            raise RuntimeError(f'Failed to clone repository: {checkout_output}')

--- a/openhands/runtime/init.py
+++ b/openhands/runtime/init.py
@@ -1,5 +1,4 @@
 from openhands.core.logger import openhands_logger as logger
-from openhands.runtime.impl.daytona.daytona_runtime import DaytonaRuntime
 from openhands.runtime.impl.docker.docker_runtime import (
    DockerRuntime,
 )
@@ -25,8 +24,6 @@ def get_runtime_cls(name: str):
        return RunloopRuntime
    elif name == 'local':
        return LocalRuntime
-    elif name == 'daytona':
-        return DaytonaRuntime
    else:
        raise ValueError(f'Runtime {name} not supported')

--- a/openhands/runtime/action_execution_server.py
+++ b/openhands/runtime/action_execution_server.py
@@ -15,7 +15,7 @@ import tempfile
 import time
 import traceback
 from contextlib import asynccontextmanager
-from typing import Type
+from pathlib import Path
 from zipfile import ZipFile

 from fastapi import Depends, FastAPI, HTTPException, Request, UploadFile
@@ -60,6 +60,7 @@ from openhands.runtime.utils.files import insert_lines, read_lines
 from openhands.runtime.utils.memory_monitor import MemoryMonitor
 from openhands.runtime.utils.runtime_init import init_user_and_working_directory
 from openhands.runtime.utils.system_stats import get_system_stats
+from openhands.utils.async_utils import call_sync_from_async, wait_all


 class ActionRequest(BaseModel):
@@ -67,6 +68,7 @@ class ActionRequest(BaseModel):


 ROOT_GID = 0
+
 SESSION_API_KEY = os.environ.get('SESSION_API_KEY')
 api_key_header = APIKeyHeader(name='X-Session-API-Key', auto_error=False)

@@ -133,6 +135,7 @@ class ActionExecutor:
    """ActionExecutor is running inside docker sandbox.
    It is responsible for executing actions received from OpenHands backend and producing observations.
    """
+
    def __init__(
        self,
        plugins_to_load: list[Plugin],
@@ -460,7 +463,6 @@ class ActionExecutor:
        if self.bash_session is not None:
            self.bash_session.close()
        self.browser.close()
->>>>>>> origin/main


 if __name__ == '__main__':
@@ -478,12 +480,6 @@ if __name__ == '__main__':
        help='BrowserGym environment used for browser evaluation',
        default=None,
    )
-    parser.add_argument(
-        '--executor-class',
-        type=str,
-        default='openhands.runtime.executor:ActionExecutor',
-        help='Action executor class to use (format: module.path:ClassName)',
-    )
    # example: python client.py 8000 --working-dir /workspace --plugins JupyterRequirement
    args = parser.parse_args()

@@ -494,13 +490,12 @@ if __name__ == '__main__':
                raise ValueError(f'Plugin {plugin} not found')
            plugins_to_load.append(ALL_PLUGINS[plugin]())  # type: ignore

-    executor_class = get_action_executor_class(args.executor_class)
-    client: RuntimeExecutor | None = None
+    client: ActionExecutor | None = None

    @asynccontextmanager
    async def lifespan(app: FastAPI):
        global client
-        client = executor_class(
+        client = ActionExecutor(
            plugins_to_load,
            work_dir=args.working_dir,
            username=args.username,
--- a/openhands/runtime/builder/docker.py
+++ b/openhands/runtime/builder/docker.py
@@ -67,7 +67,7 @@ class DockerRuntimeBuilder(RuntimeBuilder):
        """
        self.docker_client = docker.from_env()
        version_info = self.docker_client.version()
-        server_version = version_info.get('Version', '').split('+')[0].replace('-', '.')
+        server_version = version_info.get('Version', '').replace('-', '.')
        if tuple(map(int, server_version.split('.'))) < (18, 9):
            raise AgentRuntimeBuildError(
                'Docker server version must be >= 18.09 to use BuildKit'
@@ -168,12 +168,10 @@ class DockerRuntimeBuilder(RuntimeBuilder):
                )

        except subprocess.CalledProcessError as e:
-            logger.error(f'Image build failed:\n{e}')  # TODO: {e} is empty
+            logger.error(f'Image build failed:\n{e}') # TODO: {e} is empty
            logger.error(f'Command output:\n{e.output}')
            if self.rolling_logger.is_enabled():
-                logger.error(
-                    'Docker build output:\n' + self.rolling_logger.all_lines
-                )  # Show the error
+                logger.error("Docker build output:\n" + self.rolling_logger.all_lines) # Show the error
            raise

        except subprocess.TimeoutExpired:
--- a/openhands/runtime/executor/init.py
+++ b/openhands/runtime/executor/init.py
@@ -1,4 +0,0 @@
-from .base import RuntimeExecutor
-from .action_executor import ActionExecutor, BaseActionExecutor
-
-__all__ = ['ActionExecutor', 'BaseActionExecutor', 'RuntimeExecutor']
--- a/openhands/runtime/executor/action_executor.py
+++ b/openhands/runtime/executor/action_executor.py
@@ -1,267 +0,0 @@
-import base64
-import json
-import mimetypes
-import os
-from pathlib import Path
-import re
-from openhands_aci.utils.diff import get_diff
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action.browse import BrowseInteractiveAction, BrowseURLAction
-from openhands.events.action.commands import IPythonRunCellAction
-from openhands.events.action.files import FileReadAction, FileWriteAction
-from openhands.events.event import FileEditSource, FileReadSource
-from openhands.events.observation.commands import (
-    IPythonRunCellObservation,
-)
-from openhands.events.observation.error import ErrorObservation
-from openhands.events.observation.files import (
-    FileEditObservation,
-    FileReadObservation,
-    FileWriteObservation,
-)
-from openhands.events.observation.observation import Observation
-from openhands.runtime.browser import browse
-from openhands.runtime.executor.base import RuntimeExecutor
-from openhands.runtime.plugins.jupyter import JupyterPlugin
-from openhands.runtime.utils.files import insert_lines, read_lines
-
-
-class BaseActionExecutor(RuntimeExecutor):
-    """Runtime executor that dynamically dispatches actions to the appropriate method based on their name."""
-
-    async def run_action(self, action) -> Observation:
-        async with self.lock:
-            action_type = action.action
-            logger.debug(f'Running action:\n{action}')
-            observation = await getattr(self, action_type)(action)
-            logger.debug(f'Action output:\n{observation}')
-            return observation
-
-
-class ActionExecutor(BaseActionExecutor):
-    """ActionExecutor runs inside docker sandbox.
-    It is responsible for executing actions received from OpenHands backend and producing observations.
-    It is a BaseActionExectuor that provides a default implementation for all of the built-in actions.
-    """
-
-    async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
-        assert self.bash_session is not None
-        if 'jupyter' in self.plugins:
-            _jupyter_plugin: JupyterPlugin = self.plugins['jupyter']  # type: ignore
-            # This is used to make AgentSkills in Jupyter aware of the
-            # current working directory in Bash
-            jupyter_cwd = getattr(self, '_jupyter_cwd', None)
-            if self.bash_session.cwd != jupyter_cwd:
-                logger.debug(
-                    f'{self.bash_session.cwd} != {jupyter_cwd} -> reset Jupyter PWD'
-                )
-                reset_jupyter_cwd_code = (
-                    f'import os; os.chdir("{self.bash_session.cwd}")'
-                )
-                _aux_action = IPythonRunCellAction(code=reset_jupyter_cwd_code)
-                _reset_obs: IPythonRunCellObservation = await _jupyter_plugin.run(
-                    _aux_action
-                )
-                logger.debug(
-                    f'Changed working directory in IPython to: {self.bash_session.cwd}. Output: {_reset_obs}'
-                )
-                self._jupyter_cwd = self.bash_session.cwd
-
-            obs: IPythonRunCellObservation = await _jupyter_plugin.run(action)
-            obs.content = obs.content.rstrip()
-            matches = re.findall(
-                r'<oh_aci_output_[0-9a-f]{32}>(.*?)</oh_aci_output_[0-9a-f]{32}>',
-                obs.content,
-                re.DOTALL,
-            )
-            if matches:
-                results: list[str] = []
-                if len(matches) == 1:
-                    # Use specific actions/observations types
-                    match = matches[0]
-                    try:
-                        result_dict = json.loads(match)
-                        if result_dict.get('path'):  # Successful output
-                            if (
-                                result_dict['new_content'] is not None
-                            ):  # File edit commands
-                                diff = get_diff(
-                                    old_contents=result_dict['old_content']
-                                    or '',  # old_content is None when file is created
-                                    new_contents=result_dict['new_content'],
-                                    filepath=result_dict['path'],
-                                )
-                                return FileEditObservation(
-                                    content=diff,
-                                    path=result_dict['path'],
-                                    old_content=result_dict['old_content'],
-                                    new_content=result_dict['new_content'],
-                                    prev_exist=result_dict['prev_exist'],
-                                    impl_source=FileEditSource.OH_ACI,
-                                    formatted_output_and_error=result_dict[
-                                        'formatted_output_and_error'
-                                    ],
-                                )
-                            else:  # File view commands
-                                return FileReadObservation(
-                                    content=result_dict['formatted_output_and_error'],
-                                    path=result_dict['path'],
-                                    impl_source=FileReadSource.OH_ACI,
-                                )
-                        else:  # Error output
-                            results.append(result_dict['formatted_output_and_error'])
-                    except json.JSONDecodeError:
-                        # Handle JSON decoding errors if necessary
-                        results.append(
-                            f"Invalid JSON in 'openhands-aci' output: {match}"
-                        )
-                else:
-                    for match in matches:
-                        try:
-                            result_dict = json.loads(match)
-                            results.append(result_dict['formatted_output_and_error'])
-                        except json.JSONDecodeError:
-                            # Handle JSON decoding errors if necessary
-                            results.append(
-                                f"Invalid JSON in 'openhands-aci' output: {match}"
-                            )
-
-                # Combine the results (e.g., join them) or handle them as required
-                obs.content = '\n'.join(str(result) for result in results)
-
-            if action.include_extra:
-                obs.content += (
-                    f'\n[Jupyter current working directory: {self.bash_session.cwd}]'
-                )
-                obs.content += f'\n[Jupyter Python interpreter: {_jupyter_plugin.python_interpreter_path}]'
-            return obs
-        else:
-            raise RuntimeError(
-                'JupyterRequirement not found. Unable to run IPython action.'
-            )
-
-    def _resolve_path(self, path: str, working_dir: str) -> str:
-        filepath = Path(path)
-        if not filepath.is_absolute():
-            return str(Path(working_dir) / filepath)
-        return str(filepath)
-
-    async def read(self, action: FileReadAction) -> Observation:
-        assert self.bash_session is not None
-        if action.impl_source == FileReadSource.OH_ACI:
-            return await self.run_ipython(
-                IPythonRunCellAction(
-                    code=action.translated_ipython_code,
-                    include_extra=False,
-                )
-            )
-
-        # NOTE: the client code is running inside the sandbox,
-        # so there's no need to check permission
-        working_dir = self.bash_session.cwd
-        filepath = self._resolve_path(action.path, working_dir)
-        try:
-            if filepath.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
-                with open(filepath, 'rb') as file:
-                    image_data = file.read()
-                    encoded_image = base64.b64encode(image_data).decode('utf-8')
-                    mime_type, _ = mimetypes.guess_type(filepath)
-                    if mime_type is None:
-                        mime_type = 'image/png'  # default to PNG if mime type cannot be determined
-                    encoded_image = f'data:{mime_type};base64,{encoded_image}'
-
-                return FileReadObservation(path=filepath, content=encoded_image)
-            elif filepath.lower().endswith('.pdf'):
-                with open(filepath, 'rb') as file:
-                    pdf_data = file.read()
-                    encoded_pdf = base64.b64encode(pdf_data).decode('utf-8')
-                    encoded_pdf = f'data:application/pdf;base64,{encoded_pdf}'
-                return FileReadObservation(path=filepath, content=encoded_pdf)
-            elif filepath.lower().endswith(('.mp4', '.webm', '.ogg')):
-                with open(filepath, 'rb') as file:
-                    video_data = file.read()
-                    encoded_video = base64.b64encode(video_data).decode('utf-8')
-                    mime_type, _ = mimetypes.guess_type(filepath)
-                    if mime_type is None:
-                        mime_type = 'video/mp4'  # default to MP4 if MIME type cannot be determined
-                    encoded_video = f'data:{mime_type};base64,{encoded_video}'
-
-                return FileReadObservation(path=filepath, content=encoded_video)
-
-            with open(filepath, 'r', encoding='utf-8') as file:
-                lines = read_lines(file.readlines(), action.start, action.end)
-        except FileNotFoundError:
-            return ErrorObservation(
-                f'File not found: {filepath}. Your current working directory is {working_dir}.'
-            )
-        except UnicodeDecodeError:
-            return ErrorObservation(f'File could not be decoded as utf-8: {filepath}.')
-        except IsADirectoryError:
-            return ErrorObservation(
-                f'Path is a directory: {filepath}. You can only read files'
-            )
-
-        code_view = ''.join(lines)
-        return FileReadObservation(path=filepath, content=code_view)
-
-    async def write(self, action: FileWriteAction) -> Observation:
-        assert self.bash_session is not None
-        working_dir = self.bash_session.cwd
-        filepath = self._resolve_path(action.path, working_dir)
-
-        insert = action.content.split('\n')
-        try:
-            if not os.path.exists(os.path.dirname(filepath)):
-                os.makedirs(os.path.dirname(filepath))
-
-            file_exists = os.path.exists(filepath)
-            if file_exists:
-                file_stat = os.stat(filepath)
-            else:
-                file_stat = None
-
-            mode = 'w' if not file_exists else 'r+'
-            try:
-                with open(filepath, mode, encoding='utf-8') as file:
-                    if mode != 'w':
-                        all_lines = file.readlines()
-                        new_file = insert_lines(
-                            insert, all_lines, action.start, action.end
-                        )
-                    else:
-                        new_file = [i + '\n' for i in insert]
-
-                    file.seek(0)
-                    file.writelines(new_file)
-                    file.truncate()
-
-                # Handle file permissions
-                if file_exists:
-                    assert file_stat is not None
-                    # restore the original file permissions if the file already exists
-                    os.chmod(filepath, file_stat.st_mode)
-                    os.chown(filepath, file_stat.st_uid, file_stat.st_gid)
-                else:
-                    # set the new file permissions if the file is new
-                    os.chmod(filepath, 0o664)
-                    os.chown(filepath, self.user_id, self.user_id)
-
-            except FileNotFoundError:
-                return ErrorObservation(f'File not found: {filepath}')
-            except IsADirectoryError:
-                return ErrorObservation(
-                    f'Path is a directory: {filepath}. You can only write to files'
-                )
-            except UnicodeDecodeError:
-                return ErrorObservation(
-                    f'File could not be decoded as utf-8: {filepath}'
-                )
-        except PermissionError:
-            return ErrorObservation(f'Malformed paths not permitted: {filepath}')
-        return FileWriteObservation(content='', path=filepath)
-
-    async def browse(self, action: BrowseURLAction) -> Observation:
-        return await browse(action, self.browser)
-
-    async def browse_interactive(self, action: BrowseInteractiveAction) -> Observation:
-        return await browse(action, self.browser)
--- a/openhands/runtime/executor/base.py
+++ b/openhands/runtime/executor/base.py
@@ -1,126 +0,0 @@
-import asyncio
-import time
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action.commands import CmdRunAction, IPythonRunCellAction
-from openhands.events.observation.commands import CmdOutputObservation
-from openhands.events.observation.error import ErrorObservation
-from openhands.runtime.browser.browser_env import BrowserEnv
-from openhands.runtime.plugins.jupyter import JupyterPlugin
-from openhands.runtime.plugins.requirement import Plugin
-from openhands.runtime.utils.bash import BashSession
-from openhands.runtime.utils.runtime_init import init_user_and_working_directory
-from openhands.utils.async_utils import call_sync_from_async, wait_all
-
-
-ROOT_GID = 0
-INIT_COMMANDS = [
-    'git config --global user.name "openhands" && git config --global user.email "openhands@all-hands.dev" && alias git="git --no-pager"',
-]
-
-
-class RuntimeExecutor:
-    """RuntimeExecutor for running inside docker sandbox.
-    It provides a minimal base class that handles initialization of the executor, and provides a run method to execute bash commands.
-    """
-
-    def __init__(
-        self,
-        plugins_to_load: list[Plugin],
-        work_dir: str,
-        username: str,
-        user_id: int,
-        browsergym_eval_env: str | None,
-    ) -> None:
-        self.plugins_to_load = plugins_to_load
-        self._initial_cwd = work_dir
-        self.username = username
-        self.user_id = user_id
-        _updated_user_id = init_user_and_working_directory(
-            username=username, user_id=self.user_id, initial_cwd=work_dir
-        )
-        if _updated_user_id is not None:
-            self.user_id = _updated_user_id
-
-        self.bash_session: BashSession | None = None
-        self.lock = asyncio.Lock()
-        self.plugins: dict[str, Plugin] = {}
-        self.browser = BrowserEnv(browsergym_eval_env)
-        self.start_time = time.time()
-        self.last_execution_time = self.start_time
-        self._initialized = False
-
-    @property
-    def initial_cwd(self):
-        return self._initial_cwd
-
-    async def ainit(self):
-        # bash needs to be initialized first
-        self.bash_session = BashSession(
-            work_dir=self._initial_cwd,
-            username=self.username,
-        )
-        self.bash_session.initialize()
-        await wait_all(
-            (self._init_plugin(plugin) for plugin in self.plugins_to_load),
-            timeout=30,
-        )
-
-        # This is a temporary workaround
-        # TODO: refactor AgentSkills to be part of JupyterPlugin
-        # AFTER ServerRuntime is deprecated
-        if 'agent_skills' in self.plugins and 'jupyter' in self.plugins:
-            obs = await self.run_ipython(
-                IPythonRunCellAction(
-                    code='from openhands.runtime.plugins.agent_skills.agentskills import *\n'
-                )
-            )
-            logger.debug(f'AgentSkills initialized: {obs}')
-
-        await self._init_bash_commands()
-        logger.debug('Runtime client initialized.')
-
-        self._initialized = True
-
-    @property
-    def initialized(self) -> bool:
-        return self._initialized
-
-    async def _init_plugin(self, plugin: Plugin):
-        assert self.bash_session is not None
-        await plugin.initialize(self.username)
-        self.plugins[plugin.name] = plugin
-        logger.debug(f'Initializing plugin: {plugin.name}')
-
-        if isinstance(plugin, JupyterPlugin):
-            await self.run_ipython(
-                IPythonRunCellAction(
-                    code=f'import os; os.chdir("{self.bash_session.cwd}")'
-                )
-            )
-
-    async def _init_bash_commands(self):
-        logger.debug(f'Initializing by running {len(INIT_COMMANDS)} bash commands...')
-        for command in INIT_COMMANDS:
-            action = CmdRunAction(command=command)
-            action.timeout = 300
-            logger.debug(f'Executing init command: {command}')
-            obs = await self.run(action)
-            assert isinstance(obs, CmdOutputObservation)
-            logger.debug(
-                f'Init command outputs (exit code: {obs.exit_code}): {obs.content}'
-            )
-            assert obs.exit_code == 0
-
-        logger.debug('Bash init commands completed')
-
-    async def run(
-        self, action: CmdRunAction
-    ) -> CmdOutputObservation | ErrorObservation:
-        assert self.bash_session is not None
-        obs = await call_sync_from_async(self.bash_session.execute, action)
-        return obs
-
-    def close(self):
-        if self.bash_session is not None:
-            self.bash_session.close()
-        self.browser.close()
--- a/openhands/runtime/impl/daytona/README.md
+++ b/openhands/runtime/impl/daytona/README.md
@@ -1,24 +0,0 @@
-# Daytona Runtime
-
-[Daytona](https://www.daytona.io/) is a platform that provides a secure and elastic infrastructure for running AI-generated code. It provides all the necessary features for an AI Agent to interact with a codebase. It provides a Daytona SDK with official Python and TypeScript interfaces for interacting with Daytona, enabling you to programmatically manage development environments and execute code.
-
-## Getting started
-
-1. Sign in at https://app.daytona.io/
-
-1. Generate and copy your API key
-
-1. Set the following environment variables before running the OpenHands app on your local machine or via a `docker run` command:
-
-```bash
-    RUNTIME="daytona"
-    DAYTONA_API_KEY="<your-api-key>"
-```
-Optionally, if you don't want your sandboxes to default to the US region, set:
-
-```bash
-    DAYTONA_TARGET="eu"
-```
-
-## Documentation
-Read more by visiting our [documentation](https://www.daytona.io/docs/) page.
--- a/openhands/runtime/impl/daytona/daytona_runtime.py
+++ b/openhands/runtime/impl/daytona/daytona_runtime.py
@@ -1,262 +0,0 @@
-import json
-from typing import Callable
-
-import tenacity
-from daytona_sdk import (
-    CreateWorkspaceParams,
-    Daytona,
-    DaytonaConfig,
-    SessionExecuteRequest,
-    Workspace,
-)
-
-from openhands.core.config.app_config import AppConfig
-from openhands.events.stream import EventStream
-from openhands.runtime.impl.action_execution.action_execution_client import (
-    ActionExecutionClient,
-)
-from openhands.runtime.plugins.requirement import PluginRequirement
-from openhands.runtime.utils.command import get_action_execution_server_startup_command
-from openhands.utils.async_utils import call_sync_from_async
-from openhands.utils.tenacity_stop import stop_if_should_exit
-
-WORKSPACE_PREFIX = 'openhands-sandbox-'
-
-
-class DaytonaRuntime(ActionExecutionClient):
-    """The DaytonaRuntime class is a DockerRuntime that utilizes Daytona workspace as a runtime environment."""
-
-    _sandbox_port: int = 4444
-    _vscode_port: int = 4445
-
-    def __init__(
-        self,
-        config: AppConfig,
-        event_stream: EventStream,
-        sid: str = 'default',
-        plugins: list[PluginRequirement] | None = None,
-        env_vars: dict[str, str] | None = None,
-        status_callback: Callable | None = None,
-        attach_to_existing: bool = False,
-        headless_mode: bool = True,
-    ):
-        assert config.daytona_api_key, 'Daytona API key is required'
-
-        self.config = config
-        self.sid = sid
-        self.workspace_id = WORKSPACE_PREFIX + sid
-        self.workspace: Workspace | None = None
-        self._vscode_url: str | None = None
-
-        daytona_config = DaytonaConfig(
-            api_key=config.daytona_api_key.get_secret_value(),
-            server_url=config.daytona_api_url,
-            target=config.daytona_target,
-        )
-        self.daytona = Daytona(daytona_config)
-
-        # workspace_base cannot be used because we can't bind mount into a workspace.
-        if self.config.workspace_base is not None:
-            self.log(
-                'warning',
-                'Workspace mounting is not supported in the Daytona runtime.',
-            )
-
-        super().__init__(
-            config,
-            event_stream,
-            sid,
-            plugins,
-            env_vars,
-            status_callback,
-            attach_to_existing,
-            headless_mode,
-        )
-
-    def _get_workspace(self) -> Workspace | None:
-        try:
-            workspace = self.daytona.get_current_workspace(self.workspace_id)
-            self.log(
-                'info', f'Attached to existing workspace with id: {self.workspace_id}'
-            )
-        except Exception:
-            self.log(
-                'warning',
-                f'Failed to attach to existing workspace with id: {self.workspace_id}',
-            )
-            workspace = None
-
-        return workspace
-
-    def _get_creation_env_vars(self) -> dict[str, str]:
-        env_vars: dict[str, str] = {
-            'port': str(self._sandbox_port),
-            'PYTHONUNBUFFERED': '1',
-            'VSCODE_PORT': str(self._vscode_port),
-        }
-
-        if self.config.debug:
-            env_vars['DEBUG'] = 'true'
-
-        return env_vars
-
-    def _create_workspace(self) -> Workspace:
-        workspace_params = CreateWorkspaceParams(
-            id=self.workspace_id,
-            language='python',
-            image=self.config.sandbox.runtime_container_image,
-            public=True,
-            env_vars=self._get_creation_env_vars(),
-        )
-        workspace = self.daytona.create(workspace_params)
-        return workspace
-
-    def _get_workspace_status(self) -> str:
-        assert self.workspace is not None, 'Workspace is not initialized'
-        assert (
-            self.workspace.instance.info is not None
-        ), 'Workspace info is not available'
-        assert (
-            self.workspace.instance.info.provider_metadata is not None
-        ), 'Provider metadata is not available'
-
-        provider_metadata = json.loads(self.workspace.instance.info.provider_metadata)
-        return provider_metadata.get('status', 'unknown')
-
-    def _construct_api_url(self, port: int) -> str:
-        assert self.workspace is not None, 'Workspace is not initialized'
-        assert (
-            self.workspace.instance.info is not None
-        ), 'Workspace info is not available'
-        assert (
-            self.workspace.instance.info.provider_metadata is not None
-        ), 'Provider metadata is not available'
-
-        node_domain = json.loads(self.workspace.instance.info.provider_metadata)[
-            'nodeDomain'
-        ]
-        return f'https://{port}-{self.workspace.id}.{node_domain}'
-
-    def _get_action_execution_server_host(self) -> str:
-        return self.api_url
-
-    def _start_action_execution_server(self) -> None:
-        assert self.workspace is not None, 'Workspace is not initialized'
-
-        self.workspace.process.exec(
-            f'mkdir -p {self.config.workspace_mount_path_in_sandbox}'
-        )
-
-        start_command: list[str] = get_action_execution_server_startup_command(
-            server_port=self._sandbox_port,
-            plugins=self.plugins,
-            app_config=self.config,
-            override_user_id=1000,
-            override_username='openhands',
-        )
-        start_command_str: str = ' '.join(start_command)
-
-        self.log(
-            'debug',
-            f'Starting action execution server with command: {start_command_str}',
-        )
-
-        exec_session_id = 'action-execution-server'
-        self.workspace.process.create_session(exec_session_id)
-        self.workspace.process.execute_session_command(
-            exec_session_id,
-            SessionExecuteRequest(command='cd /openhands/code', var_async=True),
-        )
-
-        exec_command = self.workspace.process.execute_session_command(
-            exec_session_id,
-            SessionExecuteRequest(command=start_command_str, var_async=True),
-        )
-
-        self.log('debug', f'exec_command_id: {exec_command.cmd_id}')
-
-    @tenacity.retry(
-        stop=tenacity.stop_after_delay(120) | stop_if_should_exit(),
-        wait=tenacity.wait_fixed(1),
-        reraise=(ConnectionRefusedError,),
-    )
-    def _wait_until_alive(self):
-        super().check_if_alive()
-
-    async def connect(self):
-        self.send_status_message('STATUS$STARTING_RUNTIME')
-
-        if self.attach_to_existing:
-            self.workspace = await call_sync_from_async(self._get_workspace)
-
-        if self.workspace is None:
-            self.send_status_message('STATUS$PREPARING_CONTAINER')
-            self.workspace = await call_sync_from_async(self._create_workspace)
-            self.log('info', f'Created new workspace with id: {self.workspace_id}')
-
-        if self._get_workspace_status() == 'stopped':
-            self.log('info', 'Starting Daytona workspace...')
-            await call_sync_from_async(self.workspace.start)
-
-        self.api_url = await call_sync_from_async(
-            self._construct_api_url, self._sandbox_port
-        )
-
-        if not self.attach_to_existing:
-            await call_sync_from_async(self._start_action_execution_server)
-            self.log(
-                'info',
-                f'Container started. Action execution server url: {self.api_url}',
-            )
-
-        self.log('info', 'Waiting for client to become ready...')
-        self.send_status_message('STATUS$WAITING_FOR_CLIENT')
-        await call_sync_from_async(self._wait_until_alive)
-
-        if not self.attach_to_existing:
-            await call_sync_from_async(self.setup_initial_env)
-
-        self.log(
-            'info',
-            f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}',
-        )
-
-        if not self.attach_to_existing:
-            self.send_status_message(' ')
-        self._runtime_initialized = True
-
-    def close(self):
-        super().close()
-
-        if self.attach_to_existing:
-            return
-
-        if self.workspace:
-            self.daytona.remove(self.workspace)
-
-    @property
-    def vscode_url(self) -> str | None:
-        if self._vscode_url is not None:  # cached value
-            return self._vscode_url
-        token = super().get_vscode_token()
-        if not token:
-            self.log(
-                'warning', 'Failed to get VSCode token while trying to get VSCode URL'
-            )
-            return None
-        if not self.workspace:
-            self.log(
-                'warning', 'Workspace is not initialized while trying to get VSCode URL'
-            )
-            return None
-        self._vscode_url = (
-            self._construct_api_url(self._vscode_port)
-            + f'/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}'
-        )
-
-        self.log(
-            'debug',
-            f'VSCode URL: {self._vscode_url}',
-        )
-
-        return self._vscode_url
--- a/openhands/runtime/impl/modal/modal_runtime.py
+++ b/openhands/runtime/impl/modal/modal_runtime.py
@@ -270,10 +270,7 @@ echo 'export INPUTRC=/etc/inputrc' >> /etc/bash.bashrc

        tunnel = self.sandbox.tunnels()[self._vscode_port]
        tunnel_url = tunnel.url
-        self._vscode_url = (
-            tunnel_url
-            + f'/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}'
-        )
+        self._vscode_url = tunnel_url + f'/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}'

        self.log(
            'debug',
--- a/openhands/runtime/utils/command.py
+++ b/openhands/runtime/utils/command.py
@@ -55,8 +55,6 @@ def get_action_execution_server_startup_command(
        '--user-id',
        str(user_id),
        *browsergym_args,
-        '--executor-class',
-        app_config.runtime_executor,
    ]

    return base_cmd
--- a/openhands/runtime/utils/vscode-extensions/hello-world/extension.js
+++ b/openhands/runtime/utils/vscode-extensions/hello-world/extension.js
@@ -13,4 +13,4 @@ function deactivate() {}
 module.exports = {
    activate,
    deactivate
-}
+}
--- a/openhands/runtime/utils/vscode-extensions/hello-world/package.json
+++ b/openhands/runtime/utils/vscode-extensions/hello-world/package.json
@@ -20,4 +20,4 @@
            "title": "Hello World from OpenHands"
        }]
    }
-}
+}
--- a/openhands/security/invariant/analyzer.py
+++ b/openhands/security/invariant/analyzer.py
@@ -307,17 +307,11 @@ class InvariantAnalyzer(SecurityAnalyzer):
        new_elements = parse_element(self.trace, event)
        input = [e.model_dump(exclude_none=True) for e in new_elements]  # type: ignore [call-overload]
        self.trace.extend(new_elements)
-        check_result = self.monitor.check(self.input, input)
+        result, err = self.monitor.check(self.input, input)
        self.input.extend(input)
        risk = ActionSecurityRisk.UNKNOWN
-        
-        if isinstance(check_result, tuple):
-            result, err = check_result
-            if err:
-                logger.warning(f'Error checking policy: {err}')
-                return risk
-        else:
-            logger.warning(f'Error checking policy: {check_result}')
+        if err:
+            logger.warning(f'Error checking policy: {err}')
            return risk

        risk = self.get_risk(result)
--- a/openhands/security/invariant/client.py
+++ b/openhands/security/invariant/client.py
@@ -50,7 +50,7 @@ class InvariantClient:
        return None

    class _Policy:
-        def __init__(self, invariant: 'InvariantClient') -> None:
+        def __init__(self, invariant):
            self.server = invariant.server
            self.session_id = invariant.session_id

@@ -77,7 +77,7 @@ class InvariantClient:
            except (ConnectionError, Timeout, HTTPError) as err:
                return None, err

-        def from_string(self, rule: str) -> 'InvariantClient._Policy':
+        def from_string(self, rule: str):
            policy_id, err = self._create_policy(rule)
            if err:
                raise err
@@ -97,7 +97,7 @@ class InvariantClient:
                return None, err

    class _Monitor:
-        def __init__(self, invariant: 'InvariantClient') -> None:
+        def __init__(self, invariant):
            self.server = invariant.server
            self.session_id = invariant.session_id
            self.policy = ''
@@ -114,7 +114,7 @@ class InvariantClient:
            except (ConnectionError, Timeout, HTTPError) as err:
                return None, err

-        def from_string(self, rule: str) -> 'InvariantClient._Monitor':
+        def from_string(self, rule: str):
            monitor_id, err = self._create_monitor(rule)
            if err:
                raise err
--- a/openhands/security/invariant/nodes.py
+++ b/openhands/security/invariant/nodes.py
@@ -1,4 +1,3 @@
-from typing import Any, Iterable, Tuple
 from pydantic import BaseModel, Field
 from pydantic.dataclasses import dataclass

@@ -11,7 +10,7 @@ class LLM:

 class Event(BaseModel):
    metadata: dict | None = Field(
-        default_factory=lambda: dict(), description='Metadata associated with the event'
+        default_factory=dict, description='Metadata associated with the event'
    )


@@ -31,7 +30,7 @@ class Message(Event):
    content: str | None
    tool_calls: list[ToolCall] | None = None

-    def __rich_repr__(self) -> Iterable[Any | tuple[Any] | tuple[str, Any] | tuple[str, Any, Any]]:
+    def __rich_repr__(self):
        # Print on separate line
        yield 'role', self.role
        yield 'content', self.content
--- a/openhands/server/routes/conversation.py
+++ b/openhands/server/routes/conversation.py
@@ -1,4 +1,4 @@
-from fastapi import APIRouter, Request, status
+from fastapi import APIRouter, Request
 from fastapi.responses import JSONResponse

 from openhands.core.logger import openhands_logger as logger
@@ -40,13 +40,11 @@ async def get_vscode_url(request: Request):
        runtime: Runtime = request.state.conversation.runtime
        logger.debug(f'Runtime type: {type(runtime)}')
        logger.debug(f'Runtime VSCode URL: {runtime.vscode_url}')
-        return JSONResponse(
-            status_code=status.HTTP_200_OK, content={'vscode_url': runtime.vscode_url}
-        )
+        return JSONResponse(status_code=200, content={'vscode_url': runtime.vscode_url})
    except Exception as e:
        logger.error(f'Error getting VSCode URL: {e}')
        return JSONResponse(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            status_code=500,
            content={
                'vscode_url': None,
                'error': f'Error getting VSCode URL: {e}',
--- a/openhands/server/routes/feedback.py
+++ b/openhands/server/routes/feedback.py
@@ -1,4 +1,4 @@
-from fastapi import APIRouter, Request, status
+from fastapi import APIRouter, Request
 from fastapi.responses import JSONResponse

 from openhands.core.logger import openhands_logger as logger
@@ -50,10 +50,9 @@ async def submit_feedback(request: Request, conversation_id: str):
    )
    try:
        feedback_data = await call_sync_from_async(store_feedback, feedback)
-        return JSONResponse(status_code=status.HTTP_200_OK, content=feedback_data)
+        return JSONResponse(status_code=200, content=feedback_data)
    except Exception as e:
        logger.error(f'Error submitting feedback: {e}')
        return JSONResponse(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            content={'error': 'Failed to submit feedback'},
+            status_code=500, content={'error': 'Failed to submit feedback'}
        )
--- a/openhands/server/routes/github.py
+++ b/openhands/server/routes/github.py
@@ -1,4 +1,4 @@
-from fastapi import APIRouter, Depends, status
+from fastapi import APIRouter, Depends
 from fastapi.responses import JSONResponse
 from pydantic import SecretStr

@@ -33,13 +33,13 @@ async def get_github_repositories(
    except GhAuthenticationError as e:
        return JSONResponse(
            content=str(e),
-            status_code=status.HTTP_401_UNAUTHORIZED,
+            status_code=401,
        )

    except GHUnknownException as e:
        return JSONResponse(
            content=str(e),
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            status_code=500,
        )


@@ -56,13 +56,13 @@ async def get_github_user(
    except GhAuthenticationError as e:
        return JSONResponse(
            content=str(e),
-            status_code=status.HTTP_401_UNAUTHORIZED,
+            status_code=401,
        )

    except GHUnknownException as e:
        return JSONResponse(
            content=str(e),
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            status_code=500,
        )


@@ -79,13 +79,13 @@ async def get_github_installation_ids(
    except GhAuthenticationError as e:
        return JSONResponse(
            content=str(e),
-            status_code=status.HTTP_401_UNAUTHORIZED,
+            status_code=401,
        )

    except GHUnknownException as e:
        return JSONResponse(
            content=str(e),
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            status_code=500,
        )


@@ -108,11 +108,11 @@ async def search_github_repositories(
    except GhAuthenticationError as e:
        return JSONResponse(
            content=str(e),
-            status_code=status.HTTP_401_UNAUTHORIZED,
+            status_code=401,
        )

    except GHUnknownException as e:
        return JSONResponse(
            content=str(e),
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            status_code=500,
        )
--- a/openhands/server/routes/manage_conversations.py
+++ b/openhands/server/routes/manage_conversations.py
@@ -2,7 +2,7 @@ import uuid
 from datetime import datetime, timezone
 from typing import Callable

-from fastapi import APIRouter, Body, Request, status
+from fastapi import APIRouter, Body, Request
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel, SecretStr

@@ -165,7 +165,7 @@ async def new_conversation(request: Request, data: InitSessionRequest):
                'message': str(e),
                'msg_id': 'CONFIGURATION$SETTINGS_NOT_FOUND',
            },
-            status_code=status.HTTP_400_BAD_REQUEST,
+            status_code=400,
        )

    except LLMAuthenticationError as e:
@@ -175,7 +175,7 @@ async def new_conversation(request: Request, data: InitSessionRequest):
                'message': str(e),
                'msg_id': 'STATUS$ERROR_LLM_AUTHENTICATION',
            },
-            status_code=status.HTTP_400_BAD_REQUEST,
+            status_code=400,
        )


@@ -189,20 +189,19 @@ async def search_conversations(
        config, get_user_id(request)
    )
    conversation_metadata_result_set = await conversation_store.search(page_id, limit)
-
+    
    # Filter out conversations older than max_age
    now = datetime.now(timezone.utc)
    max_age = config.conversation_max_age_seconds
    filtered_results = [
-        conversation
-        for conversation in conversation_metadata_result_set.results
-        if hasattr(conversation, 'created_at')
-        and (now - conversation.created_at.replace(tzinfo=timezone.utc)).total_seconds()
-        <= max_age
+        conversation for conversation in conversation_metadata_result_set.results
+        if hasattr(conversation, 'created_at') and 
+        (now - conversation.created_at.replace(tzinfo=timezone.utc)).total_seconds() <= max_age
    ]
-
+    
    conversation_ids = set(
-        conversation.conversation_id for conversation in filtered_results
+        conversation.conversation_id
+        for conversation in filtered_results
    )
    running_conversations = await conversation_manager.get_running_agent_loops(
        get_user_id(request), set(conversation_ids)
--- a/openhands/server/routes/security.py
+++ b/openhands/server/routes/security.py
@@ -2,7 +2,6 @@ from fastapi import (
    APIRouter,
    HTTPException,
    Request,
-    status,
 )

 app = APIRouter(prefix='/api/conversations/{conversation_id}')
@@ -24,10 +23,7 @@ async def security_api(request: Request):
        HTTPException: If the security analyzer is not initialized.
    """
    if not request.state.conversation.security_analyzer:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail='Security analyzer not initialized',
-        )
+        raise HTTPException(status_code=404, detail='Security analyzer not initialized')

    return await request.state.conversation.security_analyzer.handle_api_request(
        request
--- a/openhands/server/routes/trajectory.py
+++ b/openhands/server/routes/trajectory.py
@@ -1,4 +1,4 @@
-from fastapi import APIRouter, Request, status
+from fastapi import APIRouter, Request
 from fastapi.responses import JSONResponse

 from openhands.core.logger import openhands_logger as logger
@@ -28,13 +28,11 @@ async def get_trajectory(request: Request):
        trajectory = []
        async for event in async_stream:
            trajectory.append(event_to_trajectory(event))
-        return JSONResponse(
-            status_code=status.HTTP_200_OK, content={'trajectory': trajectory}
-        )
+        return JSONResponse(status_code=200, content={'trajectory': trajectory})
    except Exception as e:
        logger.error(f'Error getting trajectory: {e}', exc_info=True)
        return JSONResponse(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            status_code=500,
            content={
                'trajectory': None,
                'error': f'Error getting trajectory: {e}',
--- a/openhands/utils/ensure_httpx_close.py
+++ b/openhands/utils/ensure_httpx_close.py
@@ -1,43 +0,0 @@
-"""
-LiteLLM currently have an issue where HttpHandlers are being created but not
-closed. We have submitted a PR to them, (https://github.com/BerriAI/litellm/pull/8711)
-and their dev team say they are in the process of a refactor that will fix this, but
-in the meantime, we need to manage the lifecycle of the httpx.Client manually.
-
-We can't simply pass in our own client object, because all the different implementations use
-different types of client object.
-
-So we monkey patch the httpx.Client class to track newly created instances and close these
-when the operations complete. (This is relatively safe, as if the client is reused after this
-then is will transparently reopen)
-
-Hopefully, this will be fixed soon and we can remove this abomination.
-"""
-
-from dataclasses import dataclass, field
-from functools import wraps
-from typing import Callable
-
-from httpx import Client
-
-
-@dataclass
-class EnsureHttpxClose:
-    clients: list[Client] = field(default_factory=list)
-    original_init: Callable | None = None
-
-    def __enter__(self):
-        self.original_init = Client.__init__
-
-        @wraps(Client.__init__)
-        def init_wrapper(*args, **kwargs):
-            self.clients.append(args[0])
-            return self.original_init(*args, **kwargs)  # type: ignore
-
-        Client.__init__ = init_wrapper
-
-    def __exit__(self, type, value, traceback):
-        Client.__init__ = self.original_init
-        while self.clients:
-            client = self.clients.pop()
-            client.close()
--- a/openhands/utils/http_session.py
+++ b/openhands/utils/http_session.py
@@ -1,8 +1,6 @@
 from dataclasses import dataclass, field
-from typing import Any, cast

 import requests
-from requests.structures import CaseInsensitiveDict

 from openhands.core.logger import openhands_logger as logger

@@ -17,25 +15,13 @@ class HttpSession:

    session: requests.Session | None = field(default_factory=requests.Session)

-    def __getattr__(self, name: str) -> Any:
+    def __getattr__(self, name):
        if self.session is None:
            logger.error(
                'Session is being used after close!', stack_info=True, exc_info=True
            )
-            raise RuntimeError('Session is being used after close!')
-        return getattr(self.session, name)
+        return object.__getattribute__(self.session, name)

-    @property
-    def headers(self) -> CaseInsensitiveDict[str]:
-        if self.session is None:
-            logger.error(
-                'Session is being used after close!', stack_info=True, exc_info=True
-            )
-            raise RuntimeError('Session is being used after close!')
-        # Cast to CaseInsensitiveDict[str] since mypy doesn't know the exact type
-        return cast(CaseInsensitiveDict[str], self.session.headers)
-
-    def close(self) -> None:
+    def close(self):
        if self.session is not None:
            self.session.close()
-            self.session = None
--- a/openhands/utils/prompt.py
+++ b/openhands/utils/prompt.py
@@ -5,7 +5,6 @@ from itertools import islice
 from jinja2 import Template

 from openhands.controller.state.state import State
-from openhands.core.logger import openhands_logger
 from openhands.core.message import Message, TextContent
 from openhands.microagent import (
    BaseMicroAgent,
@@ -110,12 +109,11 @@ class PromptManager:
                if name not in self.disabled_microagents:
                    self.repo_microagents[name] = microagent

-    def load_microagents(self, microagents: list[BaseMicroAgent]) -> None:
+    def load_microagents(self, microagents: list[BaseMicroAgent]):
        """Load microagents from a list of BaseMicroAgents.

        This is typically used when loading microagents from inside a repo.
        """
-        openhands_logger.info('Loading microagents: %s', [m.name for m in microagents])
        # Only keep KnowledgeMicroAgents and RepoMicroAgents
        for microagent in microagents:
            if microagent.name in self.disabled_microagents:
@@ -137,7 +135,7 @@ class PromptManager:
    def get_system_message(self) -> str:
        return self.system_template.render().strip()

-    def set_runtime_info(self, runtime: Runtime) -> None:
+    def set_runtime_info(self, runtime: Runtime):
        self.runtime_info.available_hosts = runtime.web_hosts

    def set_repository_info(
@@ -181,11 +179,6 @@ class PromptManager:
        for microagent in self.knowledge_microagents.values():
            trigger = microagent.match_trigger(message_content)
            if trigger:
-                openhands_logger.info(
-                    "Microagent '%s' triggered by keyword '%s'",
-                    microagent.name,
-                    trigger,
-                )
                micro_text = f'<extra_info>\nThe following information has been included based on a keyword match for "{trigger}". It may or may not be relevant to the user\'s request.'
                micro_text += '\n\n' + microagent.content
                micro_text += '\n</extra_info>'
--- a/openhands/utils/shutdown_listener.py
+++ b/openhands/utils/shutdown_listener.py
@@ -19,10 +19,10 @@ _should_exit = None
 _shutdown_listeners: dict[UUID, Callable] = {}


-def _register_signal_handler(sig: signal.Signals) -> None:
+def _register_signal_handler(sig: signal.Signals):
    original_handler = None

-    def handler(sig_: int, frame: FrameType | None) -> None:
+    def handler(sig_: int, frame: FrameType | None):
        logger.debug(f'shutdown_signal:{sig_}')
        global _should_exit
        if not _should_exit:
@@ -39,7 +39,7 @@ def _register_signal_handler(sig: signal.Signals) -> None:
    original_handler = signal.signal(sig, handler)


-def _register_signal_handlers() -> None:
+def _register_signal_handlers():
    global _should_exit
    if _should_exit is not None:
        return
@@ -66,7 +66,7 @@ def should_continue() -> bool:
    return not _should_exit


-def sleep_if_should_continue(timeout: float) -> None:
+def sleep_if_should_continue(timeout: float):
    if timeout <= 1:
        time.sleep(timeout)
        return
@@ -75,7 +75,7 @@ def sleep_if_should_continue(timeout: float) -> None:
        time.sleep(1)


-async def async_sleep_if_should_continue(timeout: float) -> None:
+async def async_sleep_if_should_continue(timeout: float):
    if timeout <= 1:
        await asyncio.sleep(timeout)
        return
--- a/openhands/utils/tenacity_stop.py
+++ b/openhands/utils/tenacity_stop.py
@@ -8,4 +8,4 @@ class stop_if_should_exit(stop_base):
    """Stop if the should_exit flag is set."""

    def __call__(self, retry_state: 'RetryCallState') -> bool:
-        return bool(should_exit())
+        return should_exit()
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand.

 [[package]]
 name = "aiohappyeyeballs"
@@ -181,14 +181,14 @@ files = [

 [[package]]
 name = "anthropic"
-version = "0.46.0"
+version = "0.45.2"
 description = "The official Python library for the anthropic API"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "anthropic-0.46.0-py3-none-any.whl", hash = "sha256:1445ec9be78d2de7ea51b4d5acd3574e414aea97ef903d0ecbb57bec806aaa49"},
-    {file = "anthropic-0.46.0.tar.gz", hash = "sha256:eac3d43271d02321a57c3ca68aca84c3d58873e8e72d1433288adee2d46b745b"},
+    {file = "anthropic-0.45.2-py3-none-any.whl", hash = "sha256:ecd746f7274451dfcb7e1180571ead624c7e1195d1d46cb7c70143d2aedb4d35"},
+    {file = "anthropic-0.45.2.tar.gz", hash = "sha256:32a18b9ecd12c91b2be4cae6ca2ab46a06937b5aa01b21308d97a6d29794fb5e"},
 ]

 [package.dependencies]
@@ -601,18 +601,18 @@ files = [

 [[package]]
 name = "boto3"
-version = "1.36.25"
+version = "1.36.22"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "boto3-1.36.25-py3-none-any.whl", hash = "sha256:41fb90a516995946563ec91b9d891e2516c58617e9556d5e86dfa62da3fdebe6"},
-    {file = "boto3-1.36.25.tar.gz", hash = "sha256:a057c19adffb48737c192bdb10f9d85e0d9dcecd21327f51520c15db9022a835"},
+    {file = "boto3-1.36.22-py3-none-any.whl", hash = "sha256:39957eabdce009353d72d131046489fbbfa15891865d5f069f1e8bfa414e6b81"},
+    {file = "boto3-1.36.22.tar.gz", hash = "sha256:768c8a4d4a6227fe2258105efa086f1424cba5ca915a5eb2305b2cd979306ad1"},
 ]

 [package.dependencies]
-botocore = ">=1.36.25,<1.37.0"
+botocore = ">=1.36.22,<1.37.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.11.0,<0.12.0"

@@ -621,14 +621,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]

 [[package]]
 name = "botocore"
-version = "1.36.25"
+version = "1.36.22"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "botocore-1.36.25-py3-none-any.whl", hash = "sha256:04c8ff03531e8d92baa8c98d1850bdf01668a805467f4222b65e5325f94aa8af"},
-    {file = "botocore-1.36.25.tar.gz", hash = "sha256:3b0a857d2621c336fb82a36cb6da4b6e062d346451ac46d110b074e5e5fd7cfc"},
+    {file = "botocore-1.36.22-py3-none-any.whl", hash = "sha256:75d6b34acb0686ee4d54ff6eb285e78ccfe318407428769d1e3e13351714d890"},
+    {file = "botocore-1.36.22.tar.gz", hash = "sha256:59520247d5a479731724f97c995d5a1c2aae3b303b324f39d99efcfad1d3019e"},
 ]

 [package.dependencies]
@@ -1507,47 +1507,6 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0
 torch = ["torch"]
 vision = ["Pillow (>=9.4.0)"]

-[[package]]
-name = "daytona-api-client"
-version = "0.13.0"
-description = "Daytona Workspaces"
-optional = false
-python-versions = "*"
-groups = ["main"]
-files = [
-    {file = "daytona_api_client-0.13.0-py3-none-any.whl", hash = "sha256:c4d0dcb89a328c4d0a97d8f076eaf9a00ccc54a8b9f862f4b3302ae887d03c8f"},
-    {file = "daytona_api_client-0.13.0.tar.gz", hash = "sha256:d62b7cb14361b2706df192d2da7dc2b5d02be6fd4259e9433cf2bfdc5807416d"},
-]
-
-[package.dependencies]
-pydantic = ">=2"
-python-dateutil = ">=2.8.2"
-typing-extensions = ">=4.7.1"
-urllib3 = ">=1.25.3,<3.0.0"
-
-[[package]]
-name = "daytona-sdk"
-version = "0.9.1"
-description = "Python SDK for Daytona"
-optional = false
-python-versions = ">=3.7"
-groups = ["main"]
-files = [
-    {file = "daytona_sdk-0.9.1-py3-none-any.whl", hash = "sha256:cce6c90cd3d578747b3c388e24c811cb0b21ad125d34b32836c50059a577a12a"},
-    {file = "daytona_sdk-0.9.1.tar.gz", hash = "sha256:1e2f219f55130fc72d2f14a57d008b8d3e236d45294e0ca51e249106be5ca5de"},
-]
-
-[package.dependencies]
-daytona_api_client = ">=0.13.0,<1.0.0"
-environs = ">=9.5.0,<10.0.0"
-marshmallow = ">=3.19.0,<4.0.0"
-pydantic = ">=2.4.2,<3.0.0"
-python-dateutil = ">=2.8.2,<3.0.0"
-urllib3 = ">=2.0.7,<3.0.0"
-
-[package.extras]
-dev = ["black (>=22.0.0)", "isort (>=5.10.0)", "pydoc-markdown (>=4.8.2)"]
-
 [[package]]
 name = "debugpy"
 version = "1.8.12"
@@ -1772,28 +1731,6 @@ files = [
    {file = "english-words-2.0.1.tar.gz", hash = "sha256:a4105c57493bb757a3d8973fcf8e1dc05e7ca09c836dff467c3fb445f84bc43d"},
 ]

-[[package]]
-name = "environs"
-version = "9.5.0"
-description = "simplified environment variable parsing"
-optional = false
-python-versions = ">=3.6"
-groups = ["main"]
-files = [
-    {file = "environs-9.5.0-py2.py3-none-any.whl", hash = "sha256:1e549569a3de49c05f856f40bce86979e7d5ffbbc4398e7f338574c220189124"},
-    {file = "environs-9.5.0.tar.gz", hash = "sha256:a76307b36fbe856bdca7ee9161e6c466fd7fcffc297109a118c59b54e27e30c9"},
-]
-
-[package.dependencies]
-marshmallow = ">=3.0.0"
-python-dotenv = "*"
-
-[package.extras]
-dev = ["dj-database-url", "dj-email-url", "django-cache-url", "flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)", "pytest", "tox"]
-django = ["dj-database-url", "dj-email-url", "django-cache-url"]
-lint = ["flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)"]
-tests = ["dj-database-url", "dj-email-url", "django-cache-url", "pytest"]
-
 [[package]]
 name = "evaluate"
 version = "0.4.3"
@@ -2534,14 +2471,14 @@ tool = ["click (>=6.0.0)"]

 [[package]]
 name = "google-cloud-aiplatform"
-version = "1.81.0"
+version = "1.80.0"
 description = "Vertex AI API client library"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "google_cloud_aiplatform-1.81.0-py2.py3-none-any.whl", hash = "sha256:e4b6745dfd1f6215d690e9589239d2e7ae2553e39bf9c24c7b7581af0f2f6a68"},
-    {file = "google_cloud_aiplatform-1.81.0.tar.gz", hash = "sha256:1398be33bfc2725dde47555e559b89e8cb3b2d676a47a9802d9f33a89f1630bf"},
+    {file = "google_cloud_aiplatform-1.80.0-py2.py3-none-any.whl", hash = "sha256:45d2a170f22431dae977551eccb740400bdb899807d0c8d4c16c53b2c1dbc6a5"},
+    {file = "google_cloud_aiplatform-1.80.0.tar.gz", hash = "sha256:bcaa4570a6fb56d3d29cb6b8f92588d4d1a1931de5f90cf07761853dab4c76fd"},
 ]

 [package.dependencies]
@@ -2565,8 +2502,8 @@ datasets = ["pyarrow (>=10.0.1)", "pyarrow (>=14.0.0)", "pyarrow (>=3.0.0,<8.0de
 endpoint = ["requests (>=2.28.1)"]
 evaluation = ["pandas (>=1.0.0)", "scikit-learn", "scikit-learn (<1.6.0)", "tqdm (>=4.23.0)"]
 full = ["docker (>=5.0.3)", "explainable-ai-sdk (>=1.0.0)", "fastapi (>=0.71.0,<=0.114.0)", "google-cloud-bigquery", "google-cloud-bigquery-storage", "google-vizier (>=0.1.6)", "httpx (>=0.23.0,<0.25.0)", "immutabledict", "lit-nlp (==0.4.0)", "mlflow (>=1.27.0,<=2.16.0)", "numpy (>=1.15.0)", "pandas (>=1.0.0)", "pyarrow (>=10.0.1)", "pyarrow (>=14.0.0)", "pyarrow (>=3.0.0,<8.0dev)", "pyarrow (>=6.0.1)", "pyyaml (>=5.3.1,<7)", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<2.10.dev0 || >=2.33.dev0,<=2.33.0)", "ray[default] (>=2.5,<=2.33.0)", "requests (>=2.28.1)", "scikit-learn", "scikit-learn (<1.6.0)", "setuptools (<70.0.0)", "starlette (>=0.17.1)", "tensorboard-plugin-profile (>=2.4.0,<2.18.0)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "tqdm (>=4.23.0)", "urllib3 (>=1.21.1,<1.27)", "uvicorn[standard] (>=0.16.0)", "werkzeug (>=2.0.0,<2.1.0dev)"]
-langchain = ["langchain (>=0.3,<0.4)", "langchain-core (>=0.3,<0.4)", "langchain-google-vertexai (>=2,<3)", "langgraph (>=0.2.45,<0.3)", "openinference-instrumentation-langchain (>=0.1.19,<0.2)"]
-langchain-testing = ["absl-py", "cloudpickle (>=3.0,<4.0)", "google-cloud-trace (<2)", "langchain (>=0.3,<0.4)", "langchain-core (>=0.3,<0.4)", "langchain-google-vertexai (>=2,<3)", "langgraph (>=0.2.45,<0.3)", "openinference-instrumentation-langchain (>=0.1.19,<0.2)", "opentelemetry-exporter-gcp-trace (<2)", "opentelemetry-sdk (<2)", "pydantic (>=2.6.3,<3)", "pytest-xdist", "typing-extensions"]
+langchain = ["langchain (>=0.1.16,<0.4)", "langchain-core (<0.4)", "langchain-google-vertexai (<3)", "langgraph (>=0.2.45,<0.3)", "openinference-instrumentation-langchain (>=0.1.19,<0.2)"]
+langchain-testing = ["absl-py", "cloudpickle (>=3.0,<4.0)", "google-cloud-trace (<2)", "langchain (>=0.1.16,<0.4)", "langchain-core (<0.4)", "langchain-google-vertexai (<3)", "langgraph (>=0.2.45,<0.3)", "openinference-instrumentation-langchain (>=0.1.19,<0.2)", "opentelemetry-exporter-gcp-trace (<2)", "opentelemetry-sdk (<2)", "pydantic (>=2.6.3,<3)", "pytest-xdist", "typing-extensions"]
 lit = ["explainable-ai-sdk (>=1.0.0)", "lit-nlp (==0.4.0)", "pandas (>=1.0.0)", "tensorflow (>=2.3.0,<3.0.0dev)"]
 metadata = ["numpy (>=1.15.0)", "pandas (>=1.0.0)"]
 pipelines = ["pyyaml (>=5.3.1,<7)"]
@@ -3195,14 +3132,14 @@ zstd = ["zstandard (>=0.18.0)"]

 [[package]]
 name = "huggingface-hub"
-version = "0.29.0"
+version = "0.28.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
 groups = ["main", "evaluation", "llama-index"]
 files = [
-    {file = "huggingface_hub-0.29.0-py3-none-any.whl", hash = "sha256:c02daa0b6bafbdacb1320fdfd1dc7151d0940825c88c4ef89837fdb1f6ea0afe"},
-    {file = "huggingface_hub-0.29.0.tar.gz", hash = "sha256:64034c852be270cac16c5743fe1f659b14515a9de6342d6f42cbb2ede191fc80"},
+    {file = "huggingface_hub-0.28.1-py3-none-any.whl", hash = "sha256:aa6b9a3ffdae939b72c464dbb0d7f99f56e649b55c3d52406f49e0a5a620c0a7"},
+    {file = "huggingface_hub-0.28.1.tar.gz", hash = "sha256:893471090c98e3b6efbdfdacafe4052b20b84d59866fb6f54c33d9af18c303ae"},
 ]

 [package.dependencies]
@@ -3642,14 +3579,14 @@ files = [

 [[package]]
 name = "json-repair"
-version = "0.39.0"
+version = "0.38.0"
 description = "A package to repair broken json strings"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "json_repair-0.39.0-py3-none-any.whl", hash = "sha256:a17838801dc2cbaa967ef3ee69ea8b0379819fec320e7e53fd062fda55080e76"},
-    {file = "json_repair-0.39.0.tar.gz", hash = "sha256:d6fb9817e60d923d887814a78b5c174250b542c4b03ea548071b5acdfa8c1408"},
+    {file = "json_repair-0.38.0-py3-none-any.whl", hash = "sha256:e615ab1bd69c986cdb21d4a2378308ce6f91e0168c036cde7188f69abc6e9459"},
+    {file = "json_repair-0.38.0.tar.gz", hash = "sha256:df9fbe6ba8d02f2eb17010508c9838b9c708557f67b1429829874eed559e7ca3"},
 ]

 [[package]]
@@ -4067,14 +4004,14 @@ files = [

 [[package]]
 name = "kubernetes"
-version = "32.0.1"
+version = "32.0.0"
 description = "Kubernetes python client"
 optional = false
 python-versions = ">=3.6"
 groups = ["llama-index"]
 files = [
-    {file = "kubernetes-32.0.1-py2.py3-none-any.whl", hash = "sha256:35282ab8493b938b08ab5526c7ce66588232df00ef5e1dbe88a419107dc10998"},
-    {file = "kubernetes-32.0.1.tar.gz", hash = "sha256:42f43d49abd437ada79a79a16bd48a604d3471a117a8347e87db693f2ba0ba28"},
+    {file = "kubernetes-32.0.0-py2.py3-none-any.whl", hash = "sha256:60fd8c29e8e43d9c553ca4811895a687426717deba9c0a66fb2dcc3f5ef96692"},
+    {file = "kubernetes-32.0.0.tar.gz", hash = "sha256:319fa840345a482001ac5d6062222daeb66ec4d1bcb3087402aed685adf0aecb"},
 ]

 [package.dependencies]
@@ -4182,14 +4119,14 @@ types-tqdm = "*"

 [[package]]
 name = "litellm"
-version = "1.61.13"
+version = "1.61.8"
 description = "Library to easily interface with LLM API providers"
 optional = false
 python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
 groups = ["main"]
 files = [
-    {file = "litellm-1.61.13-py3-none-any.whl", hash = "sha256:184376f26d39fba6975a0c9167166d857bebd0373939605c7181ff296affd2af"},
-    {file = "litellm-1.61.13.tar.gz", hash = "sha256:569102ae22c3df198dc5903f811582d61bf347951cacf67192511e391a1b3293"},
+    {file = "litellm-1.61.8-py3-none-any.whl", hash = "sha256:da895efefb86b71d2213257d2b57ed38c42c48c590c474c65473cdc4791e8b32"},
+    {file = "litellm-1.61.8.tar.gz", hash = "sha256:efebcafeb014c76ca992a5a49f5f2c6c8a944723c6a91b0cc70442911c3a656f"},
 ]

 [package.dependencies]
@@ -4849,7 +4786,7 @@ version = "3.26.1"
 description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "evaluation", "llama-index"]
+groups = ["evaluation", "llama-index"]
 files = [
    {file = "marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c"},
    {file = "marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6"},
@@ -4996,14 +4933,14 @@ urllib3 = "*"

 [[package]]
 name = "mistune"
-version = "3.1.2"
+version = "3.1.1"
 description = "A sane and fast Markdown parser with useful plugins and renderers"
 optional = false
 python-versions = ">=3.8"
 groups = ["runtime"]
 files = [
-    {file = "mistune-3.1.2-py3-none-any.whl", hash = "sha256:4b47731332315cdca99e0ded46fc0004001c1299ff773dfb48fbe1fd226de319"},
-    {file = "mistune-3.1.2.tar.gz", hash = "sha256:733bf018ba007e8b5f2d3a9eb624034f6ee26c4ea769a98ec533ee111d504dff"},
+    {file = "mistune-3.1.1-py3-none-any.whl", hash = "sha256:02106ac2aa4f66e769debbfa028509a275069dcffce0dfa578edd7b991ee700a"},
+    {file = "mistune-3.1.1.tar.gz", hash = "sha256:e0740d635f515119f7d1feb6f9b192ee60f0cc649f80a8f944f905706a21654c"},
 ]

 [[package]]
@@ -5107,14 +5044,14 @@ type = ["mypy (==1.14.1)"]

 [[package]]
 name = "modal"
-version = "0.73.61"
+version = "0.73.53"
 description = "Python client library for Modal"
 optional = false
 python-versions = ">=3.9"
 groups = ["main", "evaluation"]
 files = [
-    {file = "modal-0.73.61-py3-none-any.whl", hash = "sha256:64ea22fcf245c48786fd1efa935425cc6de5cee34fc0cc7cdc6bc6543259bfca"},
-    {file = "modal-0.73.61.tar.gz", hash = "sha256:624e1871b7c2af7a07fb587c1e02bc74fa39fed7c62e9db4996081e0d552e4ab"},
+    {file = "modal-0.73.53-py3-none-any.whl", hash = "sha256:64f0307cf07220878c799f193b340447e701cbe6d29b6ce506c286e8ca812ed9"},
+    {file = "modal-0.73.53.tar.gz", hash = "sha256:a50b281c63028bfeb1d971c698d3d3624bbac42d27ed25b34831b77262132aeb"},
 ]

 [package.dependencies]
@@ -6841,54 +6778,54 @@ files = [

 [[package]]
 name = "pyarrow"
-version = "19.0.1"
+version = "19.0.0"
 description = "Python library for Apache Arrow"
 optional = false
 python-versions = ">=3.9"
 groups = ["main", "evaluation"]
 files = [
-    {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69"},
-    {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec"},
-    {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad76aef7f5f7e4a757fddcdcf010a8290958f09e3470ea458c80d26f4316ae89"},
-    {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d03c9d6f2a3dffbd62671ca070f13fc527bb1867b4ec2b98c7eeed381d4f389a"},
-    {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:65cf9feebab489b19cdfcfe4aa82f62147218558d8d3f0fc1e9dea0ab8e7905a"},
-    {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:41f9706fbe505e0abc10e84bf3a906a1338905cbbcf1177b71486b03e6ea6608"},
-    {file = "pyarrow-19.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6cb2335a411b713fdf1e82a752162f72d4a7b5dbc588e32aa18383318b05866"},
-    {file = "pyarrow-19.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc55d71898ea30dc95900297d191377caba257612f384207fe9f8293b5850f90"},
-    {file = "pyarrow-19.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:7a544ec12de66769612b2d6988c36adc96fb9767ecc8ee0a4d270b10b1c51e00"},
-    {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0148bb4fc158bfbc3d6dfe5001d93ebeed253793fff4435167f6ce1dc4bddeae"},
-    {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f24faab6ed18f216a37870d8c5623f9c044566d75ec586ef884e13a02a9d62c5"},
-    {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4982f8e2b7afd6dae8608d70ba5bd91699077323f812a0448d8b7abdff6cb5d3"},
-    {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:49a3aecb62c1be1d822f8bf629226d4a96418228a42f5b40835c1f10d42e4db6"},
-    {file = "pyarrow-19.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:008a4009efdb4ea3d2e18f05cd31f9d43c388aad29c636112c2966605ba33466"},
-    {file = "pyarrow-19.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:80b2ad2b193e7d19e81008a96e313fbd53157945c7be9ac65f44f8937a55427b"},
-    {file = "pyarrow-19.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:ee8dec072569f43835932a3b10c55973593abc00936c202707a4ad06af7cb294"},
-    {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5d1ec7ec5324b98887bdc006f4d2ce534e10e60f7ad995e7875ffa0ff9cb14"},
-    {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ad4c0eb4e2a9aeb990af6c09e6fa0b195c8c0e7b272ecc8d4d2b6574809d34"},
-    {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d383591f3dcbe545f6cc62daaef9c7cdfe0dff0fb9e1c8121101cabe9098cfa6"},
-    {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b4c4156a625f1e35d6c0b2132635a237708944eb41df5fbe7d50f20d20c17832"},
-    {file = "pyarrow-19.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:5bd1618ae5e5476b7654c7b55a6364ae87686d4724538c24185bbb2952679960"},
-    {file = "pyarrow-19.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e45274b20e524ae5c39d7fc1ca2aa923aab494776d2d4b316b49ec7572ca324c"},
-    {file = "pyarrow-19.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d9dedeaf19097a143ed6da37f04f4051aba353c95ef507764d344229b2b740ae"},
-    {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ebfb5171bb5f4a52319344ebbbecc731af3f021e49318c74f33d520d31ae0c4"},
-    {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a21d39fbdb948857f67eacb5bbaaf36802de044ec36fbef7a1c8f0dd3a4ab2"},
-    {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:99bc1bec6d234359743b01e70d4310d0ab240c3d6b0da7e2a93663b0158616f6"},
-    {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1b93ef2c93e77c442c979b0d596af45e4665d8b96da598db145b0fec014b9136"},
-    {file = "pyarrow-19.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:d9d46e06846a41ba906ab25302cf0fd522f81aa2a85a71021826f34639ad31ef"},
-    {file = "pyarrow-19.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:c0fe3dbbf054a00d1f162fda94ce236a899ca01123a798c561ba307ca38af5f0"},
-    {file = "pyarrow-19.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:96606c3ba57944d128e8a8399da4812f56c7f61de8c647e3470b417f795d0ef9"},
-    {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f04d49a6b64cf24719c080b3c2029a3a5b16417fd5fd7c4041f94233af732f3"},
-    {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a9137cf7e1640dce4c190551ee69d478f7121b5c6f323553b319cac936395f6"},
-    {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7c1bca1897c28013db5e4c83944a2ab53231f541b9e0c3f4791206d0c0de389a"},
-    {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:58d9397b2e273ef76264b45531e9d552d8ec8a6688b7390b5be44c02a37aade8"},
-    {file = "pyarrow-19.0.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:b9766a47a9cb56fefe95cb27f535038b5a195707a08bf61b180e642324963b46"},
-    {file = "pyarrow-19.0.1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:6c5941c1aac89a6c2f2b16cd64fe76bcdb94b2b1e99ca6459de4e6f07638d755"},
-    {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd44d66093a239358d07c42a91eebf5015aa54fccba959db899f932218ac9cc8"},
-    {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:335d170e050bcc7da867a1ed8ffb8b44c57aaa6e0843b156a501298657b1e972"},
-    {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:1c7556165bd38cf0cd992df2636f8bcdd2d4b26916c6b7e646101aff3c16f76f"},
-    {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:699799f9c80bebcf1da0983ba86d7f289c5a2a5c04b945e2f2bcf7e874a91911"},
-    {file = "pyarrow-19.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8464c9fbe6d94a7fe1599e7e8965f350fd233532868232ab2596a71586c5a429"},
-    {file = "pyarrow-19.0.1.tar.gz", hash = "sha256:3bf266b485df66a400f282ac0b6d1b500b9d2ae73314a153dbe97d6d5cc8a99e"},
+    {file = "pyarrow-19.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c318eda14f6627966997a7d8c374a87d084a94e4e38e9abbe97395c215830e0c"},
+    {file = "pyarrow-19.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:62ef8360ff256e960f57ce0299090fb86423afed5e46f18f1225f960e05aae3d"},
+    {file = "pyarrow-19.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2795064647add0f16563e57e3d294dbfc067b723f0fd82ecd80af56dad15f503"},
+    {file = "pyarrow-19.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a218670b26fb1bc74796458d97bcab072765f9b524f95b2fccad70158feb8b17"},
+    {file = "pyarrow-19.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:66732e39eaa2247996a6b04c8aa33e3503d351831424cdf8d2e9a0582ac54b34"},
+    {file = "pyarrow-19.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:e675a3ad4732b92d72e4d24009707e923cab76b0d088e5054914f11a797ebe44"},
+    {file = "pyarrow-19.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:f094742275586cdd6b1a03655ccff3b24b2610c3af76f810356c4c71d24a2a6c"},
+    {file = "pyarrow-19.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:8e3a839bf36ec03b4315dc924d36dcde5444a50066f1c10f8290293c0427b46a"},
+    {file = "pyarrow-19.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:ce42275097512d9e4e4a39aade58ef2b3798a93aa3026566b7892177c266f735"},
+    {file = "pyarrow-19.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9348a0137568c45601b031a8d118275069435f151cbb77e6a08a27e8125f59d4"},
+    {file = "pyarrow-19.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a0144a712d990d60f7f42b7a31f0acaccf4c1e43e957f7b1ad58150d6f639c1"},
+    {file = "pyarrow-19.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2a1a109dfda558eb011e5f6385837daffd920d54ca00669f7a11132d0b1e6042"},
+    {file = "pyarrow-19.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:be686bf625aa7b9bada18defb3a3ea3981c1099697239788ff111d87f04cd263"},
+    {file = "pyarrow-19.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:239ca66d9a05844bdf5af128861af525e14df3c9591bcc05bac25918e650d3a2"},
+    {file = "pyarrow-19.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:a7bbe7109ab6198688b7079cbad5a8c22de4d47c4880d8e4847520a83b0d1b68"},
+    {file = "pyarrow-19.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:4624c89d6f777c580e8732c27bb8e77fd1433b89707f17c04af7635dd9638351"},
+    {file = "pyarrow-19.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b6d3ce4288793350dc2d08d1e184fd70631ea22a4ff9ea5c4ff182130249d9b"},
+    {file = "pyarrow-19.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:450a7d27e840e4d9a384b5c77199d489b401529e75a3b7a3799d4cd7957f2f9c"},
+    {file = "pyarrow-19.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:a08e2a8a039a3f72afb67a6668180f09fddaa38fe0d21f13212b4aba4b5d2451"},
+    {file = "pyarrow-19.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f43f5aef2a13d4d56adadae5720d1fed4c1356c993eda8b59dace4b5983843c1"},
+    {file = "pyarrow-19.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:2f672f5364b2d7829ef7c94be199bb88bf5661dd485e21d2d37de12ccb78a136"},
+    {file = "pyarrow-19.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:cf3bf0ce511b833f7bc5f5bb3127ba731e97222023a444b7359f3a22e2a3b463"},
+    {file = "pyarrow-19.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:4d8b0c0de0a73df1f1bf439af1b60f273d719d70648e898bc077547649bb8352"},
+    {file = "pyarrow-19.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92aff08e23d281c69835e4a47b80569242a504095ef6a6223c1f6bb8883431d"},
+    {file = "pyarrow-19.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3b78eff5968a1889a0f3bc81ca57e1e19b75f664d9c61a42a604bf9d8402aae"},
+    {file = "pyarrow-19.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:b34d3bde38eba66190b215bae441646330f8e9da05c29e4b5dd3e41bde701098"},
+    {file = "pyarrow-19.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5418d4d0fab3a0ed497bad21d17a7973aad336d66ad4932a3f5f7480d4ca0c04"},
+    {file = "pyarrow-19.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:e82c3d5e44e969c217827b780ed8faf7ac4c53f934ae9238872e749fa531f7c9"},
+    {file = "pyarrow-19.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:f208c3b58a6df3b239e0bb130e13bc7487ed14f39a9ff357b6415e3f6339b560"},
+    {file = "pyarrow-19.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:c751c1c93955b7a84c06794df46f1cec93e18610dcd5ab7d08e89a81df70a849"},
+    {file = "pyarrow-19.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b903afaa5df66d50fc38672ad095806443b05f202c792694f3a604ead7c6ea6e"},
+    {file = "pyarrow-19.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a22a4bc0937856263df8b94f2f2781b33dd7f876f787ed746608e06902d691a5"},
+    {file = "pyarrow-19.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:5e8a28b918e2e878c918f6d89137386c06fe577cd08d73a6be8dafb317dc2d73"},
+    {file = "pyarrow-19.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:29cd86c8001a94f768f79440bf83fee23963af5e7bc68ce3a7e5f120e17edf89"},
+    {file = "pyarrow-19.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:c0423393e4a07ff6fea08feb44153302dd261d0551cc3b538ea7a5dc853af43a"},
+    {file = "pyarrow-19.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:718947fb6d82409013a74b176bf93e0f49ef952d8a2ecd068fecd192a97885b7"},
+    {file = "pyarrow-19.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c1c162c4660e0978411a4761f91113dde8da3433683efa473501254563dcbe8"},
+    {file = "pyarrow-19.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c73268cf557e688efb60f1ccbc7376f7e18cd8e2acae9e663e98b194c40c1a2d"},
+    {file = "pyarrow-19.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:edfe6d3916e915ada9acc4e48f6dafca7efdbad2e6283db6fd9385a1b23055f1"},
+    {file = "pyarrow-19.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:da410b70a7ab8eb524112f037a7a35da7128b33d484f7671a264a4c224ac131d"},
+    {file = "pyarrow-19.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:597360ffc71fc8cceea1aec1fb60cb510571a744fffc87db33d551d5de919bec"},
+    {file = "pyarrow-19.0.0.tar.gz", hash = "sha256:8d47c691765cf497aaeed4954d226568563f1b3b74ff61139f2d77876717084b"},
 ]

 [package.extras]
@@ -8334,42 +8271,42 @@ pyasn1 = ">=0.1.3"

 [[package]]
 name = "ruff"
-version = "0.9.7"
+version = "0.9.6"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 groups = ["dev", "evaluation"]
 files = [
-    {file = "ruff-0.9.7-py3-none-linux_armv6l.whl", hash = "sha256:99d50def47305fe6f233eb8dabfd60047578ca87c9dcb235c9723ab1175180f4"},
-    {file = "ruff-0.9.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d59105ae9c44152c3d40a9c40d6331a7acd1cdf5ef404fbe31178a77b174ea66"},
-    {file = "ruff-0.9.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f313b5800483770bd540cddac7c90fc46f895f427b7820f18fe1822697f1fec9"},
-    {file = "ruff-0.9.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:042ae32b41343888f59c0a4148f103208bf6b21c90118d51dc93a68366f4e903"},
-    {file = "ruff-0.9.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87862589373b33cc484b10831004e5e5ec47dc10d2b41ba770e837d4f429d721"},
-    {file = "ruff-0.9.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a17e1e01bee0926d351a1ee9bc15c445beae888f90069a6192a07a84af544b6b"},
-    {file = "ruff-0.9.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:7c1f880ac5b2cbebd58b8ebde57069a374865c73f3bf41f05fe7a179c1c8ef22"},
-    {file = "ruff-0.9.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e63fc20143c291cab2841dbb8260e96bafbe1ba13fd3d60d28be2c71e312da49"},
-    {file = "ruff-0.9.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91ff963baed3e9a6a4eba2a02f4ca8eaa6eba1cc0521aec0987da8d62f53cbef"},
-    {file = "ruff-0.9.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88362e3227c82f63eaebf0b2eff5b88990280fb1ecf7105523883ba8c3aaf6fb"},
-    {file = "ruff-0.9.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0372c5a90349f00212270421fe91874b866fd3626eb3b397ede06cd385f6f7e0"},
-    {file = "ruff-0.9.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d76b8ab60e99e6424cd9d3d923274a1324aefce04f8ea537136b8398bbae0a62"},
-    {file = "ruff-0.9.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0c439bdfc8983e1336577f00e09a4e7a78944fe01e4ea7fe616d00c3ec69a3d0"},
-    {file = "ruff-0.9.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:115d1f15e8fdd445a7b4dc9a30abae22de3f6bcabeb503964904471691ef7606"},
-    {file = "ruff-0.9.7-py3-none-win32.whl", hash = "sha256:e9ece95b7de5923cbf38893f066ed2872be2f2f477ba94f826c8defdd6ec6b7d"},
-    {file = "ruff-0.9.7-py3-none-win_amd64.whl", hash = "sha256:3770fe52b9d691a15f0b87ada29c45324b2ace8f01200fb0c14845e499eb0c2c"},
-    {file = "ruff-0.9.7-py3-none-win_arm64.whl", hash = "sha256:b075a700b2533feb7a01130ff656a4ec0d5f340bb540ad98759b8401c32c2037"},
-    {file = "ruff-0.9.7.tar.gz", hash = "sha256:643757633417907510157b206e490c3aa11cab0c087c912f60e07fbafa87a4c6"},
+    {file = "ruff-0.9.6-py3-none-linux_armv6l.whl", hash = "sha256:2f218f356dd2d995839f1941322ff021c72a492c470f0b26a34f844c29cdf5ba"},
+    {file = "ruff-0.9.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b908ff4df65dad7b251c9968a2e4560836d8f5487c2f0cc238321ed951ea0504"},
+    {file = "ruff-0.9.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b109c0ad2ececf42e75fa99dc4043ff72a357436bb171900714a9ea581ddef83"},
+    {file = "ruff-0.9.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1de4367cca3dac99bcbd15c161404e849bb0bfd543664db39232648dc00112dc"},
+    {file = "ruff-0.9.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac3ee4d7c2c92ddfdaedf0bf31b2b176fa7aa8950efc454628d477394d35638b"},
+    {file = "ruff-0.9.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5dc1edd1775270e6aa2386119aea692039781429f0be1e0949ea5884e011aa8e"},
+    {file = "ruff-0.9.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:4a091729086dffa4bd070aa5dab7e39cc6b9d62eb2bef8f3d91172d30d599666"},
+    {file = "ruff-0.9.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1bbc6808bf7b15796cef0815e1dfb796fbd383e7dbd4334709642649625e7c5"},
+    {file = "ruff-0.9.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:589d1d9f25b5754ff230dce914a174a7c951a85a4e9270613a2b74231fdac2f5"},
+    {file = "ruff-0.9.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc61dd5131742e21103fbbdcad683a8813be0e3c204472d520d9a5021ca8b217"},
+    {file = "ruff-0.9.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5e2d9126161d0357e5c8f30b0bd6168d2c3872372f14481136d13de9937f79b6"},
+    {file = "ruff-0.9.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:68660eab1a8e65babb5229a1f97b46e3120923757a68b5413d8561f8a85d4897"},
+    {file = "ruff-0.9.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c4cae6c4cc7b9b4017c71114115db0445b00a16de3bcde0946273e8392856f08"},
+    {file = "ruff-0.9.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:19f505b643228b417c1111a2a536424ddde0db4ef9023b9e04a46ed8a1cb4656"},
+    {file = "ruff-0.9.6-py3-none-win32.whl", hash = "sha256:194d8402bceef1b31164909540a597e0d913c0e4952015a5b40e28c146121b5d"},
+    {file = "ruff-0.9.6-py3-none-win_amd64.whl", hash = "sha256:03482d5c09d90d4ee3f40d97578423698ad895c87314c4de39ed2af945633caa"},
+    {file = "ruff-0.9.6-py3-none-win_arm64.whl", hash = "sha256:0e2bb706a2be7ddfea4a4af918562fdc1bcb16df255e5fa595bbd800ce322a5a"},
+    {file = "ruff-0.9.6.tar.gz", hash = "sha256:81761592f72b620ec8fa1068a6fd00e98a5ebee342a3642efd84454f3031dca9"},
 ]

 [[package]]
 name = "runloop-api-client"
-version = "0.24.0"
+version = "0.23.0"
 description = "The official Python library for the runloop API"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "runloop_api_client-0.24.0-py3-none-any.whl", hash = "sha256:9b7a061b0e110e1b6b66e13cba9fd17ed4cfae2e4846928b3003c6aded76835b"},
-    {file = "runloop_api_client-0.24.0.tar.gz", hash = "sha256:1d47aee8996e18136aad104c5934979c03c56f9df589bd7458c909d0ad92c663"},
+    {file = "runloop_api_client-0.23.0-py3-none-any.whl", hash = "sha256:ee42c46385a986648a6c7bdf49833ec9010a1ffdf1a58c4957940f150606e3ac"},
+    {file = "runloop_api_client-0.23.0.tar.gz", hash = "sha256:93b2915d78c3258eba0924a2f1db246b586fa92bb318148ffd5d45fcb60adb3e"},
 ]

 [package.dependencies]
@@ -8454,34 +8391,33 @@ pathspec = ">=0.10.1"

 [[package]]
 name = "scikit-image"
-version = "0.25.2"
+version = "0.25.1"
 description = "Image processing in Python"
 optional = false
 python-versions = ">=3.10"
 groups = ["evaluation"]
 files = [
-    {file = "scikit_image-0.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d3278f586793176599df6a4cf48cb6beadae35c31e58dc01a98023af3dc31c78"},
-    {file = "scikit_image-0.25.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:5c311069899ce757d7dbf1d03e32acb38bb06153236ae77fcd820fd62044c063"},
-    {file = "scikit_image-0.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be455aa7039a6afa54e84f9e38293733a2622b8c2fb3362b822d459cc5605e99"},
-    {file = "scikit_image-0.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4c464b90e978d137330be433df4e76d92ad3c5f46a22f159520ce0fdbea8a09"},
-    {file = "scikit_image-0.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:60516257c5a2d2f74387c502aa2f15a0ef3498fbeaa749f730ab18f0a40fd054"},
-    {file = "scikit_image-0.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f4bac9196fb80d37567316581c6060763b0f4893d3aca34a9ede3825bc035b17"},
-    {file = "scikit_image-0.25.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d989d64ff92e0c6c0f2018c7495a5b20e2451839299a018e0e5108b2680f71e0"},
-    {file = "scikit_image-0.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2cfc96b27afe9a05bc92f8c6235321d3a66499995675b27415e0d0c76625173"},
-    {file = "scikit_image-0.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24cc986e1f4187a12aa319f777b36008764e856e5013666a4a83f8df083c2641"},
-    {file = "scikit_image-0.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:b4f6b61fc2db6340696afe3db6b26e0356911529f5f6aee8c322aa5157490c9b"},
-    {file = "scikit_image-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8db8dd03663112783221bf01ccfc9512d1cc50ac9b5b0fe8f4023967564719fb"},
-    {file = "scikit_image-0.25.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:483bd8cc10c3d8a7a37fae36dfa5b21e239bd4ee121d91cad1f81bba10cfb0ed"},
-    {file = "scikit_image-0.25.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d1e80107bcf2bf1291acfc0bf0425dceb8890abe9f38d8e94e23497cbf7ee0d"},
-    {file = "scikit_image-0.25.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a17e17eb8562660cc0d31bb55643a4da996a81944b82c54805c91b3fe66f4824"},
-    {file = "scikit_image-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:bdd2b8c1de0849964dbc54037f36b4e9420157e67e45a8709a80d727f52c7da2"},
-    {file = "scikit_image-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7efa888130f6c548ec0439b1a7ed7295bc10105458a421e9bf739b457730b6da"},
-    {file = "scikit_image-0.25.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dd8011efe69c3641920614d550f5505f83658fe33581e49bed86feab43a180fc"},
-    {file = "scikit_image-0.25.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28182a9d3e2ce3c2e251383bdda68f8d88d9fff1a3ebe1eb61206595c9773341"},
-    {file = "scikit_image-0.25.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8abd3c805ce6944b941cfed0406d88faeb19bab3ed3d4b50187af55cf24d147"},
-    {file = "scikit_image-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:64785a8acefee460ec49a354706db0b09d1f325674107d7fa3eadb663fb56d6f"},
-    {file = "scikit_image-0.25.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:330d061bd107d12f8d68f1d611ae27b3b813b8cdb0300a71d07b1379178dd4cd"},
-    {file = "scikit_image-0.25.2.tar.gz", hash = "sha256:e5a37e6cd4d0c018a7a55b9d601357e3382826d3888c10d0213fc63bff977dde"},
+    {file = "scikit_image-0.25.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:40763a3a089617e6f00f92d46b3475368b9783588a165c2aa854da95b66bb4ff"},
+    {file = "scikit_image-0.25.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:7c6b69f33e5512ee7fc53361b064430f146583f08dc75317667e81d5f8fcd0c6"},
+    {file = "scikit_image-0.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9187347d115776ff0ddba3e5d2a04638d291b1a62e3c315d17b71eea351cde8"},
+    {file = "scikit_image-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdfca713979ad1873a4b55d94bb1eb4bc713f0c10165b261bf6f7e606f44a00c"},
+    {file = "scikit_image-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:167fb146de80bb2a1493d1a760a9ac81644a8a5de254c3dd12a95d1b662d819c"},
+    {file = "scikit_image-0.25.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c1bde2d5f1dfb23b3c72ef9fcdb2dd5f42fa353e8bd606aea63590eba5e79565"},
+    {file = "scikit_image-0.25.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5112d95cccaa45c434e57efc20c1f721ab439e516e2ed49709ddc2afb7c15c70"},
+    {file = "scikit_image-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f5e313b028f5d7a9f3888ad825ddf4fb78913d7762891abb267b99244b4dd31"},
+    {file = "scikit_image-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39ad76aeff754048dabaff83db752aa0655dee425f006678d14485471bdb459d"},
+    {file = "scikit_image-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:8dc8b06176c1a2316fa8bc539fd7e96155721628ae5cf51bc1a2c62cb9786581"},
+    {file = "scikit_image-0.25.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ebf83699d60134909647395a0bf07db3859646de7192b088e656deda6bc15e95"},
+    {file = "scikit_image-0.25.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:408086520eed036340e634ab7e4f648e00238f711bac61ab933efeb11464a238"},
+    {file = "scikit_image-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bd709faa87795869ccd21f32490c37989ca5846571495822f4b9430fb42c34c"},
+    {file = "scikit_image-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6b15c0265c072a46ff4720784d756d8f8e5d63567639aa8451f6673994d6846"},
+    {file = "scikit_image-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:a689a0d091e0bd97d7767309abdeb27c43be210d075abb34e71657add920c22b"},
+    {file = "scikit_image-0.25.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f070f899d6572a125ab106c4b26d1a5fb784dc60ba6dea45c7816f08c3a4fb4d"},
+    {file = "scikit_image-0.25.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:cc9538d8db7670878aa68ea79c0b1796b6c771085e8d50f5408ee617da3281b6"},
+    {file = "scikit_image-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caa08d4fa851e1f421fcad8eac24d32f2810971dc61f1d72dc950ca9e9ec39b1"},
+    {file = "scikit_image-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9923aa898b7921fbcf503d32574d48ed937a7cff45ce8587be4868b39676e18"},
+    {file = "scikit_image-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:6c7bba6773ab8c39ee8b1cbb17c7f98965bacdb8cd8da337942be6acc38fc562"},
+    {file = "scikit_image-0.25.1.tar.gz", hash = "sha256:d4ab30540d114d37c35fe5c837f89b94aaba2a7643afae8354aa353319e9bbbb"},
 ]

 [package.dependencies]
@@ -8491,16 +8427,16 @@ networkx = ">=3.0"
 numpy = ">=1.24"
 packaging = ">=21"
 pillow = ">=10.1"
-scipy = ">=1.11.4"
+scipy = ">=1.11.2"
 tifffile = ">=2022.8.12"

 [package.extras]
-build = ["Cython (>=3.0.8)", "build (>=1.2.1)", "meson-python (>=0.16)", "ninja (>=1.11.1.1)", "numpy (>=2.0)", "pythran (>=0.16)", "spin (==0.13)"]
+build = ["Cython (>=3.0.8)", "build (>=1.2.1)", "meson-python (>=0.16)", "ninja (>=1.11.1.1)", "numpy (>=2.0)", "pythran (>=0.16)", "setuptools (>=68)", "spin (==0.13)"]
 data = ["pooch (>=1.6.0)"]
 developer = ["ipython", "pre-commit", "tomli"]
-docs = ["PyWavelets (>=1.6)", "dask[array] (>=2023.2.0)", "intersphinx-registry (>=0.2411.14)", "ipykernel", "ipywidgets", "kaleido (==0.2.1)", "matplotlib (>=3.7)", "myst-parser", "numpydoc (>=1.7)", "pandas (>=2.0)", "plotly (>=5.20)", "pooch (>=1.6)", "pydata-sphinx-theme (>=0.16)", "pytest-doctestplus", "scikit-learn (>=1.2)", "seaborn (>=0.11)", "sphinx (>=8.0)", "sphinx-copybutton", "sphinx-gallery[parallel] (>=0.18)", "sphinx_design (>=0.5)", "tifffile (>=2022.8.12)"]
-optional = ["PyWavelets (>=1.6)", "SimpleITK", "astropy (>=5.0)", "cloudpickle (>=1.1.1)", "dask[array] (>=2023.2.0)", "matplotlib (>=3.7)", "pooch (>=1.6.0)", "pyamg (>=5.2)", "scikit-learn (>=1.2)"]
-test = ["asv", "numpydoc (>=1.7)", "pooch (>=1.6.0)", "pytest (>=8)", "pytest-cov (>=2.11.0)", "pytest-doctestplus", "pytest-faulthandler", "pytest-localserver"]
+docs = ["PyWavelets (>=1.6)", "dask[array] (>=2022.9.2)", "intersphinx-registry (>=0.2411.14)", "ipykernel", "ipywidgets", "kaleido (==0.2.1)", "matplotlib (>=3.7)", "myst-parser", "numpydoc (>=1.7)", "pandas (>=2.0)", "plotly (>=5.20)", "pooch (>=1.6)", "pydata-sphinx-theme (>=0.16)", "pytest-doctestplus", "scikit-learn (>=1.2)", "seaborn (>=0.11)", "sphinx (>=8.0)", "sphinx-copybutton", "sphinx-gallery[parallel] (>=0.18)", "sphinx_design (>=0.5)", "tifffile (>=2022.8.12)"]
+optional = ["PyWavelets (>=1.6)", "SimpleITK", "astropy (>=5.0)", "cloudpickle (>=0.2.1)", "dask[array] (>=2021.1.0,!=2024.8.0)", "matplotlib (>=3.7)", "pooch (>=1.6.0)", "pyamg (>=5.2)", "scikit-learn (>=1.2)"]
+test = ["asv", "numpydoc (>=1.7)", "pooch (>=1.6.0)", "pytest (>=7.0)", "pytest-cov (>=2.11.0)", "pytest-doctestplus", "pytest-faulthandler", "pytest-localserver"]

 [[package]]
 name = "scikit-learn"
@@ -9025,14 +8961,14 @@ full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart

 [[package]]
 name = "streamlit"
-version = "1.42.2"
+version = "1.42.1"
 description = "A faster way to build and share data apps"
 optional = false
 python-versions = "!=3.9.7,>=3.9"
 groups = ["evaluation"]
 files = [
-    {file = "streamlit-1.42.2-py2.py3-none-any.whl", hash = "sha256:e2516c7fcd17a11a85cc1999fae58ace0a6458e2b4c1a411ed3d75b1aee2eb93"},
-    {file = "streamlit-1.42.2.tar.gz", hash = "sha256:62026dbdcb482790933f658b096d7dd58fa70da89c1f06fbc3658b91dcd4dab2"},
+    {file = "streamlit-1.42.1-py2.py3-none-any.whl", hash = "sha256:f995642dfe14f5e93acf1956a909f17aa3c395d5aa0bc580461d7fc9a452bbb2"},
+    {file = "streamlit-1.42.1.tar.gz", hash = "sha256:4e13c47b0385bd633cc2777cb5da277129edb774db417ee77d8f349e9c7357ba"},
 ]

 [package.dependencies]
@@ -9270,14 +9206,14 @@ files = [

 [[package]]
 name = "tifffile"
-version = "2025.2.18"
+version = "2025.1.10"
 description = "Read and write TIFF files"
 optional = false
 python-versions = ">=3.10"
 groups = ["evaluation"]
 files = [
-    {file = "tifffile-2025.2.18-py3-none-any.whl", hash = "sha256:54b36c4d5e5b8d8920134413edfe5a7cfb1c7617bb50cddf7e2772edb7149043"},
-    {file = "tifffile-2025.2.18.tar.gz", hash = "sha256:8d731789e691b468746c1615d989bc550ac93cf753e9210865222e90a5a95d11"},
+    {file = "tifffile-2025.1.10-py3-none-any.whl", hash = "sha256:ed24cf4c99fb13b4f5fb29f8a0d5605e60558c950bccbdca2a6470732a27cfb3"},
+    {file = "tifffile-2025.1.10.tar.gz", hash = "sha256:baaf0a3b87bf7ec375fa1537503353f70497eabe1bdde590f2e41cc0346e612f"},
 ]

 [package.dependencies]
@@ -10853,4 +10789,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "39e0f069346a4d1e52193899989b79ea3e02f81d67fbb2ac0fdc87e70bd1008f"
+content-hash = "583a46735c1704428110581400333ce35cd7f6a3f22e5831beb7315ebeeb377a"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,7 +39,7 @@ python-multipart = "*"
 boto3 = "*"
 minio = "^7.2.8"
 gevent = "^24.2.1"
-pyarrow = "19.0.1" # transitive dependency, pinned here to avoid conflicts
+pyarrow = "19.0.0" # transitive dependency, pinned here to avoid conflicts
 tenacity = ">=8.5,<10.0"
 zope-interface = "7.2"
 pathspec = "^0.12.1"
@@ -63,7 +63,7 @@ protobuf = "^4.21.6,<5.0.0" # chromadb currently fails on 5.0+
 opentelemetry-api = "1.25.0"
 opentelemetry-exporter-otlp-proto-grpc = "1.25.0"
 modal = ">=0.66.26,<0.74.0"
-runloop-api-client = "0.24.0"
+runloop-api-client = "0.23.0"
 libtmux = ">=0.37,<0.40"
 pygithub = "^2.5.0"
 joblib = "*"
@@ -76,7 +76,6 @@ stripe = "^11.5.0"
 ipywidgets = "^8.1.5"
 qtconsole = "^5.6.1"
 memory-profiler = "^0.61.0"
-daytona-sdk = "0.9.1"

 [tool.poetry.group.llama-index.dependencies]
 llama-index = "*"
@@ -90,7 +89,7 @@ voyageai = "*"
 llama-index-embeddings-voyageai = "*"

 [tool.poetry.group.dev.dependencies]
-ruff = "0.9.7"
+ruff = "0.9.6"
 mypy = "1.15.0"
 pre-commit = "4.1.0"
 build = "*"
@@ -109,7 +108,6 @@ reportlab = "*"
 [tool.coverage.run]
 concurrency = ["gevent"]

-
 [tool.poetry.group.runtime.dependencies]
 jupyterlab = "*"
 notebook = "*"
@@ -138,7 +136,6 @@ ignore = ["D1"]
 [tool.ruff.lint.pydocstyle]
 convention = "google"

-
 [tool.poetry.group.evaluation.dependencies]
 streamlit = "*"
 whatthepatch = "*"
--- a/tests/runtime/conftest.py
+++ b/tests/runtime/conftest.py
@@ -11,7 +11,6 @@ from openhands.core.config import AppConfig, load_app_config
 from openhands.core.logger import openhands_logger as logger
 from openhands.events import EventStream
 from openhands.runtime.base import Runtime
-from openhands.runtime.impl.daytona.daytona_runtime import DaytonaRuntime
 from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
 from openhands.runtime.impl.local.local_runtime import LocalRuntime
 from openhands.runtime.impl.remote.remote_runtime import RemoteRuntime
@@ -131,8 +130,6 @@ def get_runtime_classes() -> list[type[Runtime]]:
        return [RemoteRuntime]
    elif runtime.lower() == 'runloop':
        return [RunloopRuntime]
-    elif runtime.lower() == 'daytona':
-        return [DaytonaRuntime]
    else:
        raise ValueError(f'Invalid runtime: {runtime}')

--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -9,7 +9,6 @@ from openhands.controller.agent import Agent
 from openhands.controller.agent_controller import AgentController
 from openhands.controller.state.state import State, TrafficControlState
 from openhands.core.config import AppConfig
-from openhands.core.config.agent_config import AgentConfig
 from openhands.core.main import run_controller
 from openhands.core.schema import AgentState
 from openhands.events import Event, EventSource, EventStream, EventStreamSubscriber
@@ -606,7 +605,6 @@ async def test_context_window_exceeded_error_handling(mock_agent, mock_event_str

    state = StepState()
    mock_agent.step = state.step
-    mock_agent.config = AgentConfig()

    controller = AgentController(
        agent=mock_agent,
@@ -629,10 +627,8 @@ async def test_context_window_exceeded_error_handling(mock_agent, mock_event_str


@pytest.mark.asyncio
-async def test_run_controller_with_context_window_exceeded_with_truncation(
-    mock_agent, mock_runtime
-):
-    """Tests that the controller can make progress after handling context window exceeded errors, as long as enable_history_truncation is ON"""
+async def test_run_controller_with_context_window_exceeded(mock_agent, mock_runtime):
+    """Tests that the controller can make progress after handling context window exceeded errors."""

    class StepState:
        def __init__(self):
@@ -654,7 +650,6 @@ async def test_run_controller_with_context_window_exceeded_with_truncation(

    step_state = StepState()
    mock_agent.step = step_state.step
-    mock_agent.config = AgentConfig()

    try:
        state = await asyncio.wait_for(
@@ -687,65 +682,3 @@ async def test_run_controller_with_context_window_exceeded_with_truncation(

    # Check that the context window exceeded error was raised during the run
    assert step_state.has_errored
-
-
-@pytest.mark.asyncio
-async def test_run_controller_with_context_window_exceeded_without_truncation(
-    mock_agent, mock_runtime
-):
-    """Tests that the controller would quit upon context window exceeded errors without enable_history_truncation ON."""
-
-    class StepState:
-        def __init__(self):
-            self.has_errored = False
-
-        def step(self, state: State):
-            # If the state has more than one message and we haven't errored yet,
-            # throw the context window exceeded error
-            if len(state.history) > 1 and not self.has_errored:
-                error = ContextWindowExceededError(
-                    message='prompt is too long: 233885 tokens > 200000 maximum',
-                    model='',
-                    llm_provider='',
-                )
-                self.has_errored = True
-                raise error
-
-            return MessageAction(content=f'STEP {len(state.history)}')
-
-    step_state = StepState()
-    mock_agent.step = step_state.step
-    mock_agent.config = AgentConfig()
-    mock_agent.config.enable_history_truncation = False
-
-    try:
-        state = await asyncio.wait_for(
-            run_controller(
-                config=AppConfig(max_iterations=3),
-                initial_user_action=MessageAction(content='INITIAL'),
-                runtime=mock_runtime,
-                sid='test',
-                agent=mock_agent,
-                fake_user_response_fn=lambda _: 'repeat',
-            ),
-            timeout=10,
-        )
-
-    # A timeout error indicates the run_controller entrypoint is not making
-    # progress
-    except asyncio.TimeoutError as e:
-        raise AssertionError(
-            'The run_controller function did not complete in time.'
-        ) from e
-
-    # Hitting the iteration limit indicates the controller is failing for the
-    # expected reason
-    assert state.iteration == 2
-    assert state.agent_state == AgentState.ERROR
-    assert (
-        state.last_error
-        == 'LLMContextWindowExceedError: Conversation history longer than LLM context window limit. Consider turning on enable_history_truncation config to avoid this error'
-    )
-
-    # Check that the context window exceeded error was raised during the run
-    assert step_state.has_errored
--- a/tests/unit/test_cli_sid.py
+++ b/tests/unit/test_cli_sid.py
@@ -1,101 +0,0 @@
-import asyncio
-from argparse import Namespace
-from pathlib import Path
-from unittest.mock import AsyncMock, patch
-
-import pytest
-
-from openhands.core.cli import main
-from openhands.core.config import AppConfig
-from openhands.core.schema import AgentState
-from openhands.events.event import EventSource
-from openhands.events.observation import AgentStateChangedObservation
-
-
-@pytest.fixture
-def mock_runtime():
-    with patch('openhands.core.cli.create_runtime') as mock_create_runtime:
-        mock_runtime_instance = AsyncMock()
-        # Mock the event stream with proper async methods
-        mock_runtime_instance.event_stream = AsyncMock()
-        mock_runtime_instance.event_stream.subscribe = AsyncMock()
-        mock_runtime_instance.event_stream.add_event = AsyncMock()
-        # Mock connect method to return immediately
-        mock_runtime_instance.connect = AsyncMock()
-        # Ensure status_callback is None
-        mock_runtime_instance.status_callback = None
-        mock_create_runtime.return_value = mock_runtime_instance
-        yield mock_runtime_instance
-
-
-@pytest.fixture
-def mock_agent():
-    with patch('openhands.core.cli.create_agent') as mock_create_agent:
-        mock_agent_instance = AsyncMock()
-        mock_create_agent.return_value = mock_agent_instance
-        yield mock_agent_instance
-
-
-@pytest.fixture
-def mock_controller():
-    with patch('openhands.core.cli.create_controller') as mock_create_controller:
-        mock_controller_instance = AsyncMock()
-        # Mock run_until_done to finish immediately
-        mock_controller_instance.run_until_done = AsyncMock(return_value=None)
-        mock_create_controller.return_value = (mock_controller_instance, None)
-        yield mock_controller_instance
-
-
-@pytest.fixture
-def task_file(tmp_path: Path) -> Path:
-    # Create a temporary file with our task
-    task_file = tmp_path / 'task.txt'
-    task_file.write_text('Ask me what your task is')
-    return task_file
-
-
-@pytest.fixture
-def mock_config(task_file: Path):
-    with patch('openhands.core.cli.parse_arguments') as mock_parse_args:
-        # Create a proper Namespace with our temporary task file
-        args = Namespace(file=str(task_file), task=None, directory=None)
-        mock_parse_args.return_value = args
-        with patch('openhands.core.cli.setup_config_from_args') as mock_setup_config:
-            mock_config = AppConfig()
-            mock_setup_config.return_value = mock_config
-            yield mock_config
-
-
-@pytest.mark.asyncio
-async def test_cli_session_id_output(
-    mock_runtime, mock_agent, mock_controller, mock_config, capsys
-):
-    # status_callback is set when initializing the runtime
-    mock_controller.status_callback = None
-
-    # Use input patch just for the exit command
-    with patch('builtins.input', return_value='exit'):
-        # Create a task for main
-        main_task = asyncio.create_task(main(asyncio.get_event_loop()))
-
-        # Give it a moment to display the session ID
-        await asyncio.sleep(0.1)
-
-        # Trigger agent state change to STOPPED to end the main loop
-        event = AgentStateChangedObservation(
-            content='Stop', agent_state=AgentState.STOPPED
-        )
-        event._source = EventSource.AGENT
-        await mock_runtime.event_stream.add_event(event)
-
-        # Wait for main to finish with a timeout
-        try:
-            await asyncio.wait_for(main_task, timeout=1.0)
-        except asyncio.TimeoutError:
-            main_task.cancel()
-
-        # Check the output
-        captured = capsys.readouterr()
-        assert 'Session ID:' in captured.out
-        # Also verify that our task message was processed
-        assert 'Ask me what your task is' in str(mock_runtime.mock_calls)
--- a/tests/unit/test_condenser.py
+++ b/tests/unit/test_condenser.py
@@ -38,7 +38,7 @@ def create_test_event(
    event = Event()
    event._message = message
    event.timestamp = timestamp if timestamp else datetime.now()
-    if id is not None:
+    if id:
        event._id = id
    event._source = EventSource.USER
    return event
@@ -186,14 +186,13 @@ def test_recent_events_condenser():
    assert result == events

    # If the max_events are smaller than the number of events, only keep the last few.
-    max_events = 3
+    max_events = 2
    condenser = RecentEventsCondenser(max_events=max_events)
    result = condenser.condensed_history(mock_state)

    assert len(result) == max_events
-    assert result[0]._message == 'Event 1'  # kept from keep_first
-    assert result[1]._message == 'Event 4'  # kept from max_events
-    assert result[2]._message == 'Event 5'  # kept from max_events
+    assert result[0]._message == 'Event 4'
+    assert result[1]._message == 'Event 5'

    # If the keep_first flag is set, the first event will always be present.
    keep_first = 1
@@ -212,9 +211,9 @@ def test_recent_events_condenser():
    result = condenser.condensed_history(mock_state)

    assert len(result) == max_events
-    assert result[0]._message == 'Event 1'  # kept from keep_first
-    assert result[1]._message == 'Event 2'  # kept from keep_first
-    assert result[2]._message == 'Event 5'  # kept from max_events
+    assert result[0]._message == 'Event 1'
+    assert result[1]._message == 'Event 2'
+    assert result[2]._message == 'Event 5'


 def test_llm_summarization_condenser_from_config():
@@ -540,7 +539,7 @@ def test_llm_attention_condenser_forgets_when_larger_than_max_size(
 ):
    """Test that the LLMAttentionCondenser forgets events when the context grows too large."""
    max_size = 2
-    condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm)
+    condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)

    for i in range(max_size * 10):
        event = create_test_event(f'Event {i}', id=i)
@@ -561,7 +560,7 @@ def test_llm_attention_condenser_forgets_when_larger_than_max_size(
 def test_llm_attention_condenser_handles_events_outside_history(mock_llm, mock_state):
    """Test that the LLMAttentionCondenser handles event IDs that aren't from the event history."""
    max_size = 2
-    condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm)
+    condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)

    for i in range(max_size * 10):
        event = create_test_event(f'Event {i}', id=i)
@@ -581,7 +580,7 @@ def test_llm_attention_condenser_handles_events_outside_history(mock_llm, mock_s
 def test_llm_attention_condenser_handles_too_many_events(mock_llm, mock_state):
    """Test that the LLMAttentionCondenser handles when the response contains too many event IDs."""
    max_size = 2
-    condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm)
+    condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)

    for i in range(max_size * 10):
        event = create_test_event(f'Event {i}', id=i)
@@ -601,9 +600,7 @@ def test_llm_attention_condenser_handles_too_many_events(mock_llm, mock_state):
 def test_llm_attention_condenser_handles_too_few_events(mock_llm, mock_state):
    """Test that the LLMAttentionCondenser handles when the response contains too few event IDs."""
    max_size = 2
-    # Developer note: We must specify keep_first=0 because
-    # keep_first (1) >= max_size//2 (1) is invalid.
-    condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm)
+    condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)

    for i in range(max_size * 10):
        event = create_test_event(f'Event {i}', id=i)
@@ -617,33 +614,3 @@ def test_llm_attention_condenser_handles_too_few_events(mock_llm, mock_state):

        # The number of results should bounce back and forth between 1, 2, 1, 2, ...
        assert len(results) == (i % 2) + 1
-
-    # Add a new test verifying that keep_first=1 works with max_size > 2
-
-
-def test_llm_attention_condenser_handles_keep_first_for_larger_max_size(
-    mock_llm, mock_state
-):
-    """Test that LLMAttentionCondenser works when keep_first=1 is allowed (must be less than half of max_size)."""
-    max_size = 4  # so keep_first=1 < (max_size // 2) = 2
-    condenser = LLMAttentionCondenser(max_size=max_size, keep_first=1, llm=mock_llm)
-
-    for i in range(max_size * 2):
-        # We append new events, then ensure some are pruned.
-        event = create_test_event(f'Event {i}', id=i)
-        mock_state.history.append(event)
-
-        mock_llm.set_mock_response_content(
-            ImportantEventSelection(ids=[]).model_dump_json()
-        )
-
-        results = condenser.condensed_history(mock_state)
-
-        # We expect that the first event is always kept, and the tail grows until max_size
-        if len(mock_state.history) <= max_size:
-            # No condensation needed yet
-            assert len(results) == len(mock_state.history)
-        else:
-            # The first event is kept, plus some from the tail
-            assert results[0].id == 0
-            assert len(results) <= max_size
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -686,7 +686,6 @@ def test_api_keys_repr_str():
        modal_api_token_id='my_modal_api_token_id',
        modal_api_token_secret='my_modal_api_token_secret',
        runloop_api_key='my_runloop_api_key',
-        daytona_api_key='my_daytona_api_key',
    )
    assert 'my_e2b_api_key' not in repr(app_config)
    assert 'my_e2b_api_key' not in str(app_config)
@@ -698,8 +697,6 @@ def test_api_keys_repr_str():
    assert 'my_modal_api_token_secret' not in str(app_config)
    assert 'my_runloop_api_key' not in repr(app_config)
    assert 'my_runloop_api_key' not in str(app_config)
-    assert 'my_daytona_api_key' not in repr(app_config)
-    assert 'my_daytona_api_key' not in str(app_config)

    # Check that no other attrs in AppConfig have 'key' or 'token' in their name
    # This will fail when new attrs are added, and attract attention
@@ -708,7 +705,6 @@ def test_api_keys_repr_str():
        'modal_api_token_id',
        'modal_api_token_secret',
        'runloop_api_key',
-        'daytona_api_key',
    ]
    for attr_name in AppConfig.model_fields.keys():
        if (
--- a/tests/unit/test_ensure_httpx_close.py
+++ b/tests/unit/test_ensure_httpx_close.py
@@ -1,84 +0,0 @@
-from httpx import Client
-
-from openhands.utils.ensure_httpx_close import EnsureHttpxClose
-
-
-def test_ensure_httpx_close_basic():
-    """Test basic functionality of EnsureHttpxClose."""
-    clients = []
-    ctx = EnsureHttpxClose()
-    with ctx:
-        # Create a client - should be tracked
-        client = Client()
-        assert client in ctx.clients
-        assert len(ctx.clients) == 1
-        clients.append(client)
-
-    # After context exit, client should be closed
-    assert client.is_closed
-
-
-def test_ensure_httpx_close_multiple_clients():
-    """Test EnsureHttpxClose with multiple clients."""
-    ctx = EnsureHttpxClose()
-    with ctx:
-        client1 = Client()
-        client2 = Client()
-        assert len(ctx.clients) == 2
-        assert client1 in ctx.clients
-        assert client2 in ctx.clients
-
-    assert client1.is_closed
-    assert client2.is_closed
-
-
-def test_ensure_httpx_close_nested():
-    """Test nested usage of EnsureHttpxClose."""
-    outer_ctx = EnsureHttpxClose()
-    with outer_ctx:
-        client1 = Client()
-        assert client1 in outer_ctx.clients
-
-        inner_ctx = EnsureHttpxClose()
-        with inner_ctx:
-            client2 = Client()
-            assert client2 in inner_ctx.clients
-            # Since both contexts are using the same monkey-patched __init__,
-            # both contexts will track all clients created while they are active
-            assert client2 in outer_ctx.clients
-
-        # After inner context, client2 should be closed
-        assert client2.is_closed
-        # client1 should still be open since outer context is still active
-        assert not client1.is_closed
-
-    # After outer context, both clients should be closed
-    assert client1.is_closed
-    assert client2.is_closed
-
-
-def test_ensure_httpx_close_exception():
-    """Test EnsureHttpxClose when an exception occurs."""
-    client = None
-    ctx = EnsureHttpxClose()
-    try:
-        with ctx:
-            client = Client()
-            raise ValueError('Test exception')
-    except ValueError:
-        pass
-
-    # Client should be closed even if an exception occurred
-    assert client is not None
-    assert client.is_closed
-
-
-def test_ensure_httpx_close_restore_init():
-    """Test that the original __init__ is restored after context exit."""
-    original_init = Client.__init__
-    ctx = EnsureHttpxClose()
-    with ctx:
-        assert Client.__init__ != original_init
-
-    # Original __init__ should be restored
-    assert Client.__init__ == original_init
--- a/tests/unit/test_git_lfs.py
+++ b/tests/unit/test_git_lfs.py
@@ -0,0 +1,91 @@
+import os
+import subprocess
+import tempfile
+from unittest import mock
+
+import pytest
+
+from openhands.resolver.resolve_issue import resolve_issue
+from openhands.resolver.utils import Platform
+
+
+@pytest.mark.asyncio
+async def test_git_lfs_skip_smudge():
+    # Create a temporary directory for the test
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Mock environment variables
+        with mock.patch.dict(os.environ, {'GIT_LFS_SKIP_SMUDGE': '1'}):
+            # Mock subprocess.check_output to verify git config is called
+            with mock.patch('subprocess.check_output') as mock_check_output:
+                # Mock issue handler
+                mock_handler = mock.MagicMock()
+                mock_handler.get_clone_url.return_value = 'https://github.com/test/repo.git'
+                mock_handler.get_converted_issues.return_value = [mock.MagicMock()]
+
+                # Mock issue_handler_factory to return our mock handler
+                with mock.patch('openhands.resolver.resolve_issue.issue_handler_factory', return_value=mock_handler):
+                    # Call resolve_issue with test parameters
+                    await resolve_issue(
+                        owner='test',
+                        repo='repo',
+                        token='token',
+                        username='username',
+                        platform=Platform.GITHUB,
+                        max_iterations=1,
+                        output_dir=temp_dir,
+                        llm_config=mock.MagicMock(),
+                        runtime_container_image=None,
+                        prompt_template='',
+                        issue_type='issue',
+                        repo_instruction=None,
+                        issue_number=1,
+                        comment_id=None,
+                    )
+
+                    # Verify git config was called with correct parameters
+                    mock_check_output.assert_any_call(['git', 'config', '--global', 'filter.lfs.smudge', 'git-lfs smudge --skip'])
+                    mock_check_output.assert_any_call(['git', 'config', '--global', 'filter.lfs.process', 'git-lfs filter-process --skip'])
+
+
+@pytest.mark.asyncio
+async def test_git_clone_depth():
+    # Create a temporary directory for the test
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Mock environment variables
+        with mock.patch.dict(os.environ, {'GIT_CLONE_DEPTH': '1'}):
+            # Mock subprocess.check_output to verify git clone is called with --depth
+            with mock.patch('subprocess.check_output') as mock_check_output:
+                # Mock issue handler
+                mock_handler = mock.MagicMock()
+                mock_handler.get_clone_url.return_value = 'https://github.com/test/repo.git'
+                mock_handler.get_converted_issues.return_value = [mock.MagicMock()]
+
+                # Mock issue_handler_factory to return our mock handler
+                with mock.patch('openhands.resolver.resolve_issue.issue_handler_factory', return_value=mock_handler):
+                    # Call resolve_issue with test parameters
+                    await resolve_issue(
+                        owner='test',
+                        repo='repo',
+                        token='token',
+                        username='username',
+                        platform=Platform.GITHUB,
+                        max_iterations=1,
+                        output_dir=temp_dir,
+                        llm_config=mock.MagicMock(),
+                        runtime_container_image=None,
+                        prompt_template='',
+                        issue_type='issue',
+                        repo_instruction=None,
+                        issue_number=1,
+                        comment_id=None,
+                    )
+
+                    # Verify git clone was called with --depth
+                    mock_check_output.assert_any_call([
+                        'git',
+                        'clone',
+                        '--depth',
+                        '1',
+                        'https://github.com/test/repo.git',
+                        f'{temp_dir}/repo',
+                    ])
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@@ -1,10 +1,7 @@
 import copy
-import tempfile
-from pathlib import Path
 from unittest.mock import MagicMock, patch

 import pytest
-from litellm import PromptTokensDetails
 from litellm.exceptions import (
    RateLimitError,
 )
@@ -432,86 +429,3 @@ def test_get_token_count_error_handling(
    mock_logger.error.assert_called_once_with(
        'Error getting token count for\n model gpt-4o\nToken counting failed'
    )
-
-
-@patch('openhands.llm.llm.litellm_completion')
-def test_llm_token_usage(mock_litellm_completion, default_config):
-    # This mock response includes usage details with prompt_tokens,
-    # completion_tokens, prompt_tokens_details.cached_tokens, and model_extra.cache_creation_input_tokens
-    mock_response_1 = {
-        'id': 'test-response-usage',
-        'choices': [{'message': {'content': 'Usage test response'}}],
-        'usage': {
-            'prompt_tokens': 12,
-            'completion_tokens': 3,
-            'prompt_tokens_details': PromptTokensDetails(cached_tokens=2),
-            'model_extra': {'cache_creation_input_tokens': 5},
-        },
-    }
-
-    # Create a second usage scenario to test accumulation and a different response_id
-    mock_response_2 = {
-        'id': 'test-response-usage-2',
-        'choices': [{'message': {'content': 'Second usage test response'}}],
-        'usage': {
-            'prompt_tokens': 7,
-            'completion_tokens': 2,
-            'prompt_tokens_details': PromptTokensDetails(cached_tokens=1),
-            'model_extra': {'cache_creation_input_tokens': 3},
-        },
-    }
-
-    # We'll make mock_litellm_completion return these responses in sequence
-    mock_litellm_completion.side_effect = [mock_response_1, mock_response_2]
-
-    llm = LLM(config=default_config)
-
-    # First call
-    llm.completion(messages=[{'role': 'user', 'content': 'Hello usage!'}])
-
-    # Verify we have exactly one usage record after first call
-    token_usage_list = llm.metrics.get()['token_usages']
-    assert len(token_usage_list) == 1
-    usage_entry_1 = token_usage_list[0]
-    assert usage_entry_1['prompt_tokens'] == 12
-    assert usage_entry_1['completion_tokens'] == 3
-    assert usage_entry_1['cache_read_tokens'] == 2
-    assert usage_entry_1['cache_write_tokens'] == 5
-    assert usage_entry_1['response_id'] == 'test-response-usage'
-
-    # Second call
-    llm.completion(messages=[{'role': 'user', 'content': 'Hello again!'}])
-
-    # Now we expect two usage records total
-    token_usage_list = llm.metrics.get()['token_usages']
-    assert len(token_usage_list) == 2
-    usage_entry_2 = token_usage_list[-1]
-    assert usage_entry_2['prompt_tokens'] == 7
-    assert usage_entry_2['completion_tokens'] == 2
-    assert usage_entry_2['cache_read_tokens'] == 1
-    assert usage_entry_2['cache_write_tokens'] == 3
-    assert usage_entry_2['response_id'] == 'test-response-usage-2'
-
-
-@patch('openhands.llm.llm.litellm_completion')
-def test_completion_with_log_completions(mock_litellm_completion, default_config):
-    with tempfile.TemporaryDirectory() as temp_dir:
-        default_config.log_completions = True
-        default_config.log_completions_folder = temp_dir
-        mock_response = {
-            'choices': [{'message': {'content': 'This is a mocked response.'}}]
-        }
-        mock_litellm_completion.return_value = mock_response
-
-        test_llm = LLM(config=default_config)
-        response = test_llm.completion(
-            messages=[{'role': 'user', 'content': 'Hello!'}],
-            stream=False,
-            drop_params=True,
-        )
-        assert (
-            response['choices'][0]['message']['content'] == 'This is a mocked response.'
-        )
-        files = list(Path(temp_dir).iterdir())
-        # Expect a log to be generated
-        assert len(files) == 1
--- a/tests/unit/test_long_term_memory.py
+++ b/tests/unit/test_long_term_memory.py
@@ -7,7 +7,7 @@ import pytest
 from openhands.core.config import AgentConfig, LLMConfig
 from openhands.events.event import Event, EventSource
 from openhands.events.stream import EventStream
-from openhands.memory.long_term_memory import LongTermMemory
+from openhands.memory.memory import LongTermMemory
 from openhands.storage.files import FileStore


@@ -154,7 +154,7 @@ def test_load_events_into_index_with_invalid_json(
    """Test loading events with malformed event data."""
    # Simulate an event that causes event_to_memory to raise a JSONDecodeError
    with patch(
-        'openhands.memory.long_term_memory.event_to_memory',
+        'openhands.memory.memory.event_to_memory',
        side_effect=json.JSONDecodeError('Expecting value', '', 0),
    ):
        event = _create_action_event('invalid_action')
@@ -190,8 +190,7 @@ def test_search_returns_correct_results(long_term_memory: LongTermMemory):
        MagicMock(get_text=MagicMock(return_value='result2')),
    ]
    with patch(
-        'openhands.memory.long_term_memory.VectorIndexRetriever',
-        return_value=mock_retriever,
+        'openhands.memory.memory.VectorIndexRetriever', return_value=mock_retriever
    ):
        results = long_term_memory.search(query='test query', k=2)
        assert results == ['result1', 'result2']
@@ -202,8 +201,7 @@ def test_search_with_no_results(long_term_memory: LongTermMemory):
    mock_retriever = MagicMock()
    mock_retriever.retrieve.return_value = []
    with patch(
-        'openhands.memory.long_term_memory.VectorIndexRetriever',
-        return_value=mock_retriever,
+        'openhands.memory.memory.VectorIndexRetriever', return_value=mock_retriever
    ):
        results = long_term_memory.search(query='no results', k=5)
        assert results == []
--- a/tests/unit/test_message_utils.py
+++ b/tests/unit/test_message_utils.py
@@ -3,18 +3,13 @@ from unittest.mock import Mock
 import pytest

 from openhands.core.message import ImageContent, TextContent
-from openhands.core.message_utils import (
-    get_action_message,
-    get_observation_message,
-    get_token_usage_for_event,
-    get_token_usage_for_event_id,
-)
+from openhands.core.message_utils import get_action_message, get_observation_message
 from openhands.events.action import (
    AgentFinishAction,
    CmdRunAction,
    MessageAction,
 )
-from openhands.events.event import Event, EventSource, FileEditSource, FileReadSource
+from openhands.events.event import EventSource, FileEditSource, FileReadSource
 from openhands.events.observation.browse import BrowserOutputObservation
 from openhands.events.observation.commands import (
    CmdOutputMetadata,
@@ -26,7 +21,6 @@ from openhands.events.observation.error import ErrorObservation
 from openhands.events.observation.files import FileEditObservation, FileReadObservation
 from openhands.events.observation.reject import UserRejectObservation
 from openhands.events.tool import ToolCallMetadata
-from openhands.llm.metrics import Metrics, TokenUsage


 def test_cmd_output_observation_message():
@@ -275,113 +269,3 @@ def test_agent_finish_action_with_tool_metadata():
    assert len(result.content) == 1
    assert isinstance(result.content[0], TextContent)
    assert 'Initial thought\nTask completed' in result.content[0].text
-
-
-def test_get_token_usage_for_event():
-    """Test that we get the single matching usage record (if any) based on the event's model_response.id."""
-    metrics = Metrics(model_name='test-model')
-    usage_record = TokenUsage(
-        model='test-model',
-        prompt_tokens=10,
-        completion_tokens=5,
-        cache_read_tokens=2,
-        cache_write_tokens=1,
-        response_id='test-response-id',
-    )
-    metrics.add_token_usage(
-        prompt_tokens=usage_record.prompt_tokens,
-        completion_tokens=usage_record.completion_tokens,
-        cache_read_tokens=usage_record.cache_read_tokens,
-        cache_write_tokens=usage_record.cache_write_tokens,
-        response_id=usage_record.response_id,
-    )
-
-    # Create an event referencing that response_id
-    event = Event()
-    mock_tool_call_metadata = ToolCallMetadata(
-        tool_call_id='test-tool-call',
-        function_name='fake_function',
-        model_response={'id': 'test-response-id'},
-        total_calls_in_response=1,
-    )
-    event._tool_call_metadata = (
-        mock_tool_call_metadata  # normally you'd do event.tool_call_metadata = ...
-    )
-
-    # We should find that usage record
-    found = get_token_usage_for_event(event, metrics)
-    assert found is not None
-    assert found.prompt_tokens == 10
-    assert found.response_id == 'test-response-id'
-
-    # If we change the event's response ID, we won't find anything
-    mock_tool_call_metadata.model_response.id = 'some-other-id'
-    found2 = get_token_usage_for_event(event, metrics)
-    assert found2 is None
-
-    # If the event has no tool_call_metadata, also returns None
-    event._tool_call_metadata = None
-    found3 = get_token_usage_for_event(event, metrics)
-    assert found3 is None
-
-
-def test_get_token_usage_for_event_id():
-    """
-    Test that we search backward from the event with the given id,
-    finding the first usage record that matches a response_id in that or previous events.
-    """
-    metrics = Metrics(model_name='test-model')
-    usage_1 = TokenUsage(
-        model='test-model',
-        prompt_tokens=12,
-        completion_tokens=3,
-        cache_read_tokens=2,
-        cache_write_tokens=5,
-        response_id='resp-1',
-    )
-    usage_2 = TokenUsage(
-        model='test-model',
-        prompt_tokens=7,
-        completion_tokens=2,
-        cache_read_tokens=1,
-        cache_write_tokens=3,
-        response_id='resp-2',
-    )
-    metrics._token_usages.append(usage_1)
-    metrics._token_usages.append(usage_2)
-
-    # Build a list of events
-    events = []
-    for i in range(5):
-        e = Event()
-        e._id = i
-        # We'll attach usage_1 to event 1, usage_2 to event 3
-        if i == 1:
-            e._tool_call_metadata = ToolCallMetadata(
-                tool_call_id='tid1',
-                function_name='fn1',
-                model_response={'id': 'resp-1'},
-                total_calls_in_response=1,
-            )
-        elif i == 3:
-            e._tool_call_metadata = ToolCallMetadata(
-                tool_call_id='tid2',
-                function_name='fn2',
-                model_response={'id': 'resp-2'},
-                total_calls_in_response=1,
-            )
-        events.append(e)
-
-    # If we ask for event_id=3, we find usage_2 immediately
-    found_3 = get_token_usage_for_event_id(events, 3, metrics)
-    assert found_3 is not None
-    assert found_3.response_id == 'resp-2'
-
-    # If we ask for event_id=2, no usage in event2, so we check event1 -> usage_1 found
-    found_2 = get_token_usage_for_event_id(events, 2, metrics)
-    assert found_2 is not None
-    assert found_2.response_id == 'resp-1'
-
-    # If we ask for event_id=0, no usage in event0 or earlier, so return None
-    found_0 = get_token_usage_for_event_id(events, 0, metrics)
-    assert found_0 is None
--- a/tests/unit/test_prompt_caching.py
+++ b/tests/unit/test_prompt_caching.py
@@ -84,12 +84,12 @@ def test_get_messages(codeact_agent: CodeActAgent):
    assert messages[0].content[0].cache_prompt  # system message
    assert messages[1].role == 'user'
    assert messages[1].content[0].text.endswith('Initial user message')
-    # we add cache breakpoint to only the last user message
-    assert not messages[1].content[0].cache_prompt
+    # we add cache breakpoint to the last 3 user messages
+    assert messages[1].content[0].cache_prompt

    assert messages[3].role == 'user'
    assert messages[3].content[0].text == ('Hello, agent!')
-    assert not messages[3].content[0].cache_prompt
+    assert messages[3].content[0].cache_prompt
    assert messages[4].role == 'assistant'
    assert messages[4].content[0].text == 'Hello, user!'
    assert not messages[4].content[0].cache_prompt
@@ -121,9 +121,10 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
        if msg.role in ('user', 'system') and msg.content[0].cache_prompt
    ]
    assert (
-        len(cached_user_messages) == 2
-    )  # Including the initial system+user + last user message
+        len(cached_user_messages) == 4
+    )  # Including the initial system+user + 2 last user message

-    # Verify that these are indeed the last user message (from start)
+    # Verify that these are indeed the last two user messages (from start)
    assert cached_user_messages[0].content[0].text.startswith('You are OpenHands agent')
-    assert cached_user_messages[1].content[0].text.startswith('User message 14')
+    assert cached_user_messages[2].content[0].text.startswith('User message 1')
+    assert cached_user_messages[3].content[0].text.startswith('User message 1')
--- a/tests/unit/test_truncation.py
+++ b/tests/unit/test_truncation.py
@@ -1,4 +1,3 @@
-import asyncio
 from unittest.mock import MagicMock

 import pytest
@@ -73,53 +72,6 @@ class TestTruncation:
            if isinstance(event, CmdOutputObservation):
                assert any(e._id == event._cause for e in truncated[: i + 1])

-    def test_truncation_does_not_impact_trajectory(self, mock_event_stream, mock_agent):
-        controller = AgentController(
-            agent=mock_agent,
-            event_stream=mock_event_stream,
-            max_iterations=10,
-            sid='test_truncation',
-            confirmation_mode=False,
-            headless_mode=True,
-        )
-
-        # Create a sequence of events with IDs
-        first_msg = MessageAction(content='Hello, start task', wait_for_response=False)
-        first_msg._source = EventSource.USER
-        first_msg._id = 1
-
-        pairs = 10
-        history_len = 1 + 2 * pairs
-        events = [first_msg]
-        for i in range(pairs):
-            cmd = CmdRunAction(command=f'cmd{i}')
-            cmd._id = i + 2
-            obs = CmdOutputObservation(
-                command=f'cmd{i}', content=f'output{i}', command_id=cmd._id
-            )
-            obs._cause = cmd._id
-            events.extend([cmd, obs])
-
-        # patch events to history for testing purpose
-        controller.state.history = events
-
-        # Update mock event stream
-        mock_event_stream.get_events.return_value = controller.state.history
-
-        assert len(controller.state.history) == history_len
-
-        # Force apply truncation
-        controller._handle_long_context_error()
-
-        # Check that the history has been truncated before closing the controller
-        assert len(controller.state.history) == 13 < history_len
-
-        # Check that after properly closing the controller, history is recovered
-        asyncio.run(controller.close())
-        assert len(controller.event_stream.get_events()) == history_len
-        assert len(controller.state.history) == history_len
-        assert len(controller.get_trajectory()) == history_len
-
    def test_context_window_exceeded_handling(self, mock_event_stream, mock_agent):
        controller = AgentController(
            agent=mock_agent,
Author	SHA1	Message	Date
Rohit Malhotra	164fab0a8d	Add tests for Git LFS and clone depth support	2025-02-20 12:45:25 -05:00
Rohit Malhotra	bddf6674c3	Add Git LFS and clone depth support	2025-02-20 12:45:24 -05:00