Chore: clean up LLM (prompt caching, supports fn calling), leftover renames (#6095)

This commit is contained in:
Engel Nyst
2025-02-01 18:14:08 +01:00
committed by GitHub
parent 3b0bbce54a
commit eb8d1600c3
21 changed files with 119 additions and 187 deletions

View File

@@ -219,7 +219,7 @@ jobs:
exit 1
fi
# Run unit tests with the EventStream runtime Docker images as root
# Run unit tests with the Docker runtime Docker images as root
test_runtime_root:
name: RT Unit Tests (Root)
needs: [ghcr_build_runtime]
@@ -286,7 +286,7 @@ jobs:
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
TEST_RUNTIME=eventstream \
TEST_RUNTIME=docker \
SANDBOX_USER_ID=$(id -u) \
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
@@ -297,7 +297,7 @@ jobs:
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
# Run unit tests with the EventStream runtime Docker images as openhands user
# Run unit tests with the Docker runtime Docker images as openhands user
test_runtime_oh:
name: RT Unit Tests (openhands)
runs-on: ubuntu-latest
@@ -363,7 +363,7 @@ jobs:
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
TEST_RUNTIME=eventstream \
TEST_RUNTIME=docker \
SANDBOX_USER_ID=$(id -u) \
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \

View File

@@ -1,8 +1,8 @@
# 📦 Runtime EventStream
# 📦 Runtime Docker
Le Runtime EventStream d'OpenHands est le composant principal qui permet l'exécution sécurisée et flexible des actions des agents d'IA.
Le Runtime Docker d'OpenHands est le composant principal qui permet l'exécution sécurisée et flexible des actions des agents d'IA.
Il crée un environnement en bac à sable (sandbox) en utilisant Docker, où du code arbitraire peut être exécuté en toute sécurité sans risquer le système hôte.
## Pourquoi avons-nous besoin d'un runtime en bac à sable ?

View File

@@ -163,7 +163,7 @@ Les options de configuration de base sont définies dans la section `[core]` du
- `runtime`
- Type : `str`
- Valeur par défaut : `"eventstream"`
- Valeur par défaut : `"docker"`
- Description : Environnement d'exécution
- `default_agent`

View File

@@ -114,7 +114,7 @@ Pour créer un workflow d'évaluation pour votre benchmark, suivez ces étapes :
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
config = AppConfig(
default_agent=metadata.agent_class,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='your_container_image',

View File

@@ -1,8 +1,8 @@
以下是翻译后的内容:
# 📦 EventStream 运行时
# 📦 Docker 运行时
OpenHands EventStream 运行时是实现 AI 代理操作安全灵活执行的核心组件。
OpenHands Docker 运行时是实现 AI 代理操作安全灵活执行的核心组件。
它使用 Docker 创建一个沙盒环境,可以安全地运行任意代码而不会危及主机系统。
## 为什么我们需要沙盒运行时?

View File

@@ -162,7 +162,7 @@
- `runtime`
- 类型: `str`
- 默认值: `"eventstream"`
- 默认值: `"docker"`
- 描述: 运行时环境
- `default_agent`

View File

@@ -112,7 +112,7 @@ OpenHands 的主要入口点在 `openhands/core/main.py` 中。以下是它的
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
config = AppConfig(
default_agent=metadata.agent_class,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='your_container_image',

View File

@@ -1,6 +1,6 @@
# 📦 EventStream Runtime
# 📦 Docker Runtime
The OpenHands EventStream Runtime is the core component that enables secure and flexible execution of AI agent's action.
The OpenHands Docker Runtime is the core component that enables secure and flexible execution of AI agent's action.
It creates a sandboxed environment using Docker, where arbitrary code can be run safely without risking the host system.
## Why do we need a sandboxed runtime?

View File

@@ -126,7 +126,7 @@ The core configuration options are defined in the `[core]` section of the `confi
- `runtime`
- Type: `str`
- Default: `"eventstream"`
- Default: `"docker"`
- Description: Runtime environment
- `default_agent`

View File

@@ -112,7 +112,7 @@ To create an evaluation workflow for your benchmark, follow these steps:
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
config = AppConfig(
default_agent=metadata.agent_class,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='your_container_image',

View File

@@ -98,12 +98,6 @@ class OperationCancelled(Exception):
super().__init__(message)
class CloudFlareBlockageError(Exception):
"""Exception raised when a request is blocked by CloudFlare."""
pass
# ============================================
# LLM function calling Exceptions
# ============================================

View File

@@ -27,7 +27,6 @@ from litellm.exceptions import (
from litellm.types.utils import CostPerToken, ModelResponse, Usage
from litellm.utils import create_pretrained_tokenizer
from openhands.core.exceptions import CloudFlareBlockageError
from openhands.core.logger import openhands_logger as logger
from openhands.core.message import Message
from openhands.llm.debug_mixin import DebugMixin
@@ -218,21 +217,14 @@ class LLM(RetryMixin, DebugMixin):
# log the entire LLM prompt
self.log_prompt(messages)
if self.is_caching_prompt_active():
# Anthropic-specific prompt caching
if 'claude-3' in self.config.model:
kwargs['extra_headers'] = {
'anthropic-beta': 'prompt-caching-2024-07-31',
}
# set litellm modify_params to the configured value
# True by default to allow litellm to do transformations like adding a default message, when a message is empty
# NOTE: this setting is global; unlike drop_params, it cannot be overridden in the litellm completion partial
litellm.modify_params = self.config.modify_params
try:
# Record start time for latency measurement
start_time = time.time()
# we don't support streaming here, thus we get a ModelResponse
resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)
@@ -305,12 +297,6 @@ class LLM(RetryMixin, DebugMixin):
f.write(json.dumps(_d))
return resp
except APIError as e:
if 'Attention Required! | Cloudflare' in str(e):
raise CloudFlareBlockageError(
'Request blocked by CloudFlare'
) from e
raise
self._completion = wrapper
@@ -414,6 +400,25 @@ class LLM(RetryMixin, DebugMixin):
):
self.config.max_output_tokens = self.model_info['max_tokens']
# Initialize function calling capability
# Check if model name is in our supported list
model_name_supported = (
self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
)
# Handle native_tool_calling user-defined configuration
if self.config.native_tool_calling is None:
self._function_calling_active = model_name_supported
elif self.config.native_tool_calling is False:
self._function_calling_active = False
else:
# try to enable native tool calling if supported by the model
self._function_calling_active = litellm.supports_function_calling(
model=self.config.model
)
def vision_is_active(self) -> bool:
with warnings.catch_warnings():
warnings.simplefilter('ignore')
@@ -455,24 +460,11 @@ class LLM(RetryMixin, DebugMixin):
)
def is_function_calling_active(self) -> bool:
# Check if model name is in our supported list
model_name_supported = (
self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
)
"""Returns whether function calling is supported and enabled for this LLM instance.
# Handle native_tool_calling user-defined configuration
if self.config.native_tool_calling is None:
return model_name_supported
elif self.config.native_tool_calling is False:
return False
else:
# try to enable native tool calling if supported by the model
supports_fn_call = litellm.supports_function_calling(
model=self.config.model
)
return supports_fn_call
The result is cached during initialization for performance.
"""
return self._function_calling_active
def _post_completion(self, response: ModelResponse) -> float:
"""Post-process the completion response.

View File

@@ -24,7 +24,7 @@ class RetryMixin:
A retry decorator with the parameters customizable in configuration.
"""
num_retries = kwargs.get('num_retries')
retry_exceptions = kwargs.get('retry_exceptions')
retry_exceptions: tuple = kwargs.get('retry_exceptions', ())
retry_min_wait = kwargs.get('retry_min_wait')
retry_max_wait = kwargs.get('retry_max_wait')
retry_multiplier = kwargs.get('retry_multiplier')
@@ -39,7 +39,9 @@ class RetryMixin:
before_sleep=before_sleep,
stop=stop_after_attempt(num_retries) | stop_if_should_exit(),
reraise=True,
retry=(retry_if_exception_type(retry_exceptions)),
retry=(
retry_if_exception_type(retry_exceptions)
), # retry only for these types
wait=wait_exponential(
multiplier=retry_multiplier,
min=retry_min_wait,

View File

@@ -1,4 +1,4 @@
"""Bash-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
"""Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
import os
import time

View File

@@ -1,4 +1,4 @@
"""Browsing-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
"""Browsing-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
from conftest import _close_test_runtime, _load_runtime

View File

@@ -1,4 +1,4 @@
"""Edit-related tests for the EventStreamRuntime."""
"""Edit-related tests for the DockerRuntime."""
import os

View File

@@ -1,4 +1,4 @@
"""Env vars related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
"""Env vars related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
import os
from unittest.mock import patch

View File

@@ -1,4 +1,4 @@
"""Image-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
"""Image-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
import pytest
from conftest import _close_test_runtime, _load_runtime

View File

@@ -1,4 +1,4 @@
"""Bash-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
"""Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
import asyncio
import os

View File

@@ -389,27 +389,6 @@ def test_completion_with_two_positional_args(mock_litellm_completion, default_co
) # No positional args should be passed to litellm_completion here
@patch('openhands.llm.llm.litellm_completion')
def test_llm_cloudflare_blockage(mock_litellm_completion, default_config):
from litellm.exceptions import APIError
from openhands.core.exceptions import CloudFlareBlockageError
llm = LLM(default_config)
mock_litellm_completion.side_effect = APIError(
message='Attention Required! | Cloudflare',
llm_provider='test_provider',
model='test_model',
status_code=403,
)
with pytest.raises(CloudFlareBlockageError, match='Request blocked by CloudFlare'):
llm.completion(messages=[{'role': 'user', 'content': 'Hello'}])
# Ensure the completion was called
mock_litellm_completion.assert_called_once()
@patch('openhands.llm.llm.litellm.token_counter')
def test_get_token_count_with_dict_messages(mock_token_counter, default_config):
mock_token_counter.return_value = 42

View File

@@ -128,38 +128,3 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
assert cached_user_messages[0].content[0].text.startswith('You are OpenHands agent')
assert cached_user_messages[2].content[0].text.startswith('User message 1')
assert cached_user_messages[3].content[0].text.startswith('User message 1')
def test_prompt_caching_headers(codeact_agent: CodeActAgent):
history = list()
# Setup
msg1 = MessageAction('Hello, agent!')
msg1._source = 'user'
history.append(msg1)
msg2 = MessageAction('Hello, user!')
msg2._source = 'agent'
history.append(msg2)
mock_state = Mock()
mock_state.history = history
mock_state.max_iterations = 5
mock_state.iteration = 0
mock_state.extra_data = {}
codeact_agent.reset()
# Create a mock for litellm_completion
def check_headers(**kwargs):
assert 'extra_headers' in kwargs
assert 'anthropic-beta' in kwargs['extra_headers']
assert kwargs['extra_headers']['anthropic-beta'] == 'prompt-caching-2024-07-31'
return ModelResponse(
choices=[{'message': {'content': 'Hello! How can I assist you today?'}}]
)
codeact_agent.llm._completion_unwrapped = check_headers
result = codeact_agent.step(mock_state)
# Assert
assert isinstance(result, MessageAction)
assert result.content == 'Hello! How can I assist you today?'