mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-09 14:57:59 -05:00
Chore: clean up LLM (prompt caching, supports fn calling), leftover renames (#6095)
This commit is contained in:
8
.github/workflows/ghcr-build.yml
vendored
8
.github/workflows/ghcr-build.yml
vendored
@@ -219,7 +219,7 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Run unit tests with the EventStream runtime Docker images as root
|
# Run unit tests with the Docker runtime Docker images as root
|
||||||
test_runtime_root:
|
test_runtime_root:
|
||||||
name: RT Unit Tests (Root)
|
name: RT Unit Tests (Root)
|
||||||
needs: [ghcr_build_runtime]
|
needs: [ghcr_build_runtime]
|
||||||
@@ -286,7 +286,7 @@ jobs:
|
|||||||
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
|
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
|
||||||
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
|
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
|
||||||
|
|
||||||
TEST_RUNTIME=eventstream \
|
TEST_RUNTIME=docker \
|
||||||
SANDBOX_USER_ID=$(id -u) \
|
SANDBOX_USER_ID=$(id -u) \
|
||||||
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
||||||
TEST_IN_CI=true \
|
TEST_IN_CI=true \
|
||||||
@@ -297,7 +297,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||||
|
|
||||||
# Run unit tests with the EventStream runtime Docker images as openhands user
|
# Run unit tests with the Docker runtime Docker images as openhands user
|
||||||
test_runtime_oh:
|
test_runtime_oh:
|
||||||
name: RT Unit Tests (openhands)
|
name: RT Unit Tests (openhands)
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -363,7 +363,7 @@ jobs:
|
|||||||
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
|
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
|
||||||
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
|
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
|
||||||
|
|
||||||
TEST_RUNTIME=eventstream \
|
TEST_RUNTIME=docker \
|
||||||
SANDBOX_USER_ID=$(id -u) \
|
SANDBOX_USER_ID=$(id -u) \
|
||||||
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
||||||
TEST_IN_CI=true \
|
TEST_IN_CI=true \
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
|
|
||||||
|
|
||||||
# 📦 Runtime EventStream
|
# 📦 Runtime Docker
|
||||||
|
|
||||||
Le Runtime EventStream d'OpenHands est le composant principal qui permet l'exécution sécurisée et flexible des actions des agents d'IA.
|
Le Runtime Docker d'OpenHands est le composant principal qui permet l'exécution sécurisée et flexible des actions des agents d'IA.
|
||||||
Il crée un environnement en bac à sable (sandbox) en utilisant Docker, où du code arbitraire peut être exécuté en toute sécurité sans risquer le système hôte.
|
Il crée un environnement en bac à sable (sandbox) en utilisant Docker, où du code arbitraire peut être exécuté en toute sécurité sans risquer le système hôte.
|
||||||
|
|
||||||
## Pourquoi avons-nous besoin d'un runtime en bac à sable ?
|
## Pourquoi avons-nous besoin d'un runtime en bac à sable ?
|
||||||
|
|||||||
@@ -163,7 +163,7 @@ Les options de configuration de base sont définies dans la section `[core]` du
|
|||||||
|
|
||||||
- `runtime`
|
- `runtime`
|
||||||
- Type : `str`
|
- Type : `str`
|
||||||
- Valeur par défaut : `"eventstream"`
|
- Valeur par défaut : `"docker"`
|
||||||
- Description : Environnement d'exécution
|
- Description : Environnement d'exécution
|
||||||
|
|
||||||
- `default_agent`
|
- `default_agent`
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ Pour créer un workflow d'évaluation pour votre benchmark, suivez ces étapes :
|
|||||||
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
|
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
|
||||||
config = AppConfig(
|
config = AppConfig(
|
||||||
default_agent=metadata.agent_class,
|
default_agent=metadata.agent_class,
|
||||||
runtime='eventstream',
|
runtime='docker',
|
||||||
max_iterations=metadata.max_iterations,
|
max_iterations=metadata.max_iterations,
|
||||||
sandbox=SandboxConfig(
|
sandbox=SandboxConfig(
|
||||||
base_container_image='your_container_image',
|
base_container_image='your_container_image',
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
以下是翻译后的内容:
|
以下是翻译后的内容:
|
||||||
|
|
||||||
# 📦 EventStream 运行时
|
# 📦 Docker 运行时
|
||||||
|
|
||||||
OpenHands EventStream 运行时是实现 AI 代理操作安全灵活执行的核心组件。
|
OpenHands Docker 运行时是实现 AI 代理操作安全灵活执行的核心组件。
|
||||||
它使用 Docker 创建一个沙盒环境,可以安全地运行任意代码而不会危及主机系统。
|
它使用 Docker 创建一个沙盒环境,可以安全地运行任意代码而不会危及主机系统。
|
||||||
|
|
||||||
## 为什么我们需要沙盒运行时?
|
## 为什么我们需要沙盒运行时?
|
||||||
|
|||||||
@@ -162,7 +162,7 @@
|
|||||||
|
|
||||||
- `runtime`
|
- `runtime`
|
||||||
- 类型: `str`
|
- 类型: `str`
|
||||||
- 默认值: `"eventstream"`
|
- 默认值: `"docker"`
|
||||||
- 描述: 运行时环境
|
- 描述: 运行时环境
|
||||||
|
|
||||||
- `default_agent`
|
- `default_agent`
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ OpenHands 的主要入口点在 `openhands/core/main.py` 中。以下是它的
|
|||||||
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
|
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
|
||||||
config = AppConfig(
|
config = AppConfig(
|
||||||
default_agent=metadata.agent_class,
|
default_agent=metadata.agent_class,
|
||||||
runtime='eventstream',
|
runtime='docker',
|
||||||
max_iterations=metadata.max_iterations,
|
max_iterations=metadata.max_iterations,
|
||||||
sandbox=SandboxConfig(
|
sandbox=SandboxConfig(
|
||||||
base_container_image='your_container_image',
|
base_container_image='your_container_image',
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# 📦 EventStream Runtime
|
# 📦 Docker Runtime
|
||||||
|
|
||||||
The OpenHands EventStream Runtime is the core component that enables secure and flexible execution of AI agent's action.
|
The OpenHands Docker Runtime is the core component that enables secure and flexible execution of AI agent's action.
|
||||||
It creates a sandboxed environment using Docker, where arbitrary code can be run safely without risking the host system.
|
It creates a sandboxed environment using Docker, where arbitrary code can be run safely without risking the host system.
|
||||||
|
|
||||||
## Why do we need a sandboxed runtime?
|
## Why do we need a sandboxed runtime?
|
||||||
|
|||||||
@@ -126,7 +126,7 @@ The core configuration options are defined in the `[core]` section of the `confi
|
|||||||
|
|
||||||
- `runtime`
|
- `runtime`
|
||||||
- Type: `str`
|
- Type: `str`
|
||||||
- Default: `"eventstream"`
|
- Default: `"docker"`
|
||||||
- Description: Runtime environment
|
- Description: Runtime environment
|
||||||
|
|
||||||
- `default_agent`
|
- `default_agent`
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ To create an evaluation workflow for your benchmark, follow these steps:
|
|||||||
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
|
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
|
||||||
config = AppConfig(
|
config = AppConfig(
|
||||||
default_agent=metadata.agent_class,
|
default_agent=metadata.agent_class,
|
||||||
runtime='eventstream',
|
runtime='docker',
|
||||||
max_iterations=metadata.max_iterations,
|
max_iterations=metadata.max_iterations,
|
||||||
sandbox=SandboxConfig(
|
sandbox=SandboxConfig(
|
||||||
base_container_image='your_container_image',
|
base_container_image='your_container_image',
|
||||||
|
|||||||
@@ -98,12 +98,6 @@ class OperationCancelled(Exception):
|
|||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
|
|
||||||
|
|
||||||
class CloudFlareBlockageError(Exception):
|
|
||||||
"""Exception raised when a request is blocked by CloudFlare."""
|
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================
|
# ============================================
|
||||||
# LLM function calling Exceptions
|
# LLM function calling Exceptions
|
||||||
# ============================================
|
# ============================================
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ from litellm.exceptions import (
|
|||||||
from litellm.types.utils import CostPerToken, ModelResponse, Usage
|
from litellm.types.utils import CostPerToken, ModelResponse, Usage
|
||||||
from litellm.utils import create_pretrained_tokenizer
|
from litellm.utils import create_pretrained_tokenizer
|
||||||
|
|
||||||
from openhands.core.exceptions import CloudFlareBlockageError
|
|
||||||
from openhands.core.logger import openhands_logger as logger
|
from openhands.core.logger import openhands_logger as logger
|
||||||
from openhands.core.message import Message
|
from openhands.core.message import Message
|
||||||
from openhands.llm.debug_mixin import DebugMixin
|
from openhands.llm.debug_mixin import DebugMixin
|
||||||
@@ -218,99 +217,86 @@ class LLM(RetryMixin, DebugMixin):
|
|||||||
# log the entire LLM prompt
|
# log the entire LLM prompt
|
||||||
self.log_prompt(messages)
|
self.log_prompt(messages)
|
||||||
|
|
||||||
if self.is_caching_prompt_active():
|
|
||||||
# Anthropic-specific prompt caching
|
|
||||||
if 'claude-3' in self.config.model:
|
|
||||||
kwargs['extra_headers'] = {
|
|
||||||
'anthropic-beta': 'prompt-caching-2024-07-31',
|
|
||||||
}
|
|
||||||
|
|
||||||
# set litellm modify_params to the configured value
|
# set litellm modify_params to the configured value
|
||||||
# True by default to allow litellm to do transformations like adding a default message, when a message is empty
|
# True by default to allow litellm to do transformations like adding a default message, when a message is empty
|
||||||
# NOTE: this setting is global; unlike drop_params, it cannot be overridden in the litellm completion partial
|
# NOTE: this setting is global; unlike drop_params, it cannot be overridden in the litellm completion partial
|
||||||
litellm.modify_params = self.config.modify_params
|
litellm.modify_params = self.config.modify_params
|
||||||
|
|
||||||
try:
|
# Record start time for latency measurement
|
||||||
# Record start time for latency measurement
|
start_time = time.time()
|
||||||
start_time = time.time()
|
|
||||||
# we don't support streaming here, thus we get a ModelResponse
|
|
||||||
resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)
|
|
||||||
|
|
||||||
# Calculate and record latency
|
# we don't support streaming here, thus we get a ModelResponse
|
||||||
latency = time.time() - start_time
|
resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)
|
||||||
response_id = resp.get('id', 'unknown')
|
|
||||||
self.metrics.add_response_latency(latency, response_id)
|
|
||||||
|
|
||||||
non_fncall_response = copy.deepcopy(resp)
|
# Calculate and record latency
|
||||||
|
latency = time.time() - start_time
|
||||||
|
response_id = resp.get('id', 'unknown')
|
||||||
|
self.metrics.add_response_latency(latency, response_id)
|
||||||
|
|
||||||
|
non_fncall_response = copy.deepcopy(resp)
|
||||||
|
if mock_function_calling:
|
||||||
|
assert len(resp.choices) == 1
|
||||||
|
assert mock_fncall_tools is not None
|
||||||
|
non_fncall_response_message = resp.choices[0].message
|
||||||
|
fn_call_messages_with_response = (
|
||||||
|
convert_non_fncall_messages_to_fncall_messages(
|
||||||
|
messages + [non_fncall_response_message], mock_fncall_tools
|
||||||
|
)
|
||||||
|
)
|
||||||
|
fn_call_response_message = fn_call_messages_with_response[-1]
|
||||||
|
if not isinstance(fn_call_response_message, LiteLLMMessage):
|
||||||
|
fn_call_response_message = LiteLLMMessage(
|
||||||
|
**fn_call_response_message
|
||||||
|
)
|
||||||
|
resp.choices[0].message = fn_call_response_message
|
||||||
|
|
||||||
|
message_back: str = resp['choices'][0]['message']['content'] or ''
|
||||||
|
tool_calls: list[ChatCompletionMessageToolCall] = resp['choices'][0][
|
||||||
|
'message'
|
||||||
|
].get('tool_calls', [])
|
||||||
|
if tool_calls:
|
||||||
|
for tool_call in tool_calls:
|
||||||
|
fn_name = tool_call.function.name
|
||||||
|
fn_args = tool_call.function.arguments
|
||||||
|
message_back += f'\nFunction call: {fn_name}({fn_args})'
|
||||||
|
|
||||||
|
# log the LLM response
|
||||||
|
self.log_response(message_back)
|
||||||
|
|
||||||
|
# post-process the response first to calculate cost
|
||||||
|
cost = self._post_completion(resp)
|
||||||
|
|
||||||
|
# log for evals or other scripts that need the raw completion
|
||||||
|
if self.config.log_completions:
|
||||||
|
assert self.config.log_completions_folder is not None
|
||||||
|
log_file = os.path.join(
|
||||||
|
self.config.log_completions_folder,
|
||||||
|
# use the metric model name (for draft editor)
|
||||||
|
f'{self.metrics.model_name.replace("/", "__")}-{time.time()}.json',
|
||||||
|
)
|
||||||
|
|
||||||
|
# set up the dict to be logged
|
||||||
|
_d = {
|
||||||
|
'messages': messages,
|
||||||
|
'response': resp,
|
||||||
|
'args': args,
|
||||||
|
'kwargs': {k: v for k, v in kwargs.items() if k != 'messages'},
|
||||||
|
'timestamp': time.time(),
|
||||||
|
'cost': cost,
|
||||||
|
}
|
||||||
|
|
||||||
|
# if non-native function calling, save messages/response separately
|
||||||
if mock_function_calling:
|
if mock_function_calling:
|
||||||
assert len(resp.choices) == 1
|
# Overwrite response as non-fncall to be consistent with messages
|
||||||
assert mock_fncall_tools is not None
|
_d['response'] = non_fncall_response
|
||||||
non_fncall_response_message = resp.choices[0].message
|
|
||||||
fn_call_messages_with_response = (
|
|
||||||
convert_non_fncall_messages_to_fncall_messages(
|
|
||||||
messages + [non_fncall_response_message], mock_fncall_tools
|
|
||||||
)
|
|
||||||
)
|
|
||||||
fn_call_response_message = fn_call_messages_with_response[-1]
|
|
||||||
if not isinstance(fn_call_response_message, LiteLLMMessage):
|
|
||||||
fn_call_response_message = LiteLLMMessage(
|
|
||||||
**fn_call_response_message
|
|
||||||
)
|
|
||||||
resp.choices[0].message = fn_call_response_message
|
|
||||||
|
|
||||||
message_back: str = resp['choices'][0]['message']['content'] or ''
|
# Save fncall_messages/response separately
|
||||||
tool_calls: list[ChatCompletionMessageToolCall] = resp['choices'][0][
|
_d['fncall_messages'] = original_fncall_messages
|
||||||
'message'
|
_d['fncall_response'] = resp
|
||||||
].get('tool_calls', [])
|
with open(log_file, 'w') as f:
|
||||||
if tool_calls:
|
f.write(json.dumps(_d))
|
||||||
for tool_call in tool_calls:
|
|
||||||
fn_name = tool_call.function.name
|
|
||||||
fn_args = tool_call.function.arguments
|
|
||||||
message_back += f'\nFunction call: {fn_name}({fn_args})'
|
|
||||||
|
|
||||||
# log the LLM response
|
return resp
|
||||||
self.log_response(message_back)
|
|
||||||
|
|
||||||
# post-process the response first to calculate cost
|
|
||||||
cost = self._post_completion(resp)
|
|
||||||
|
|
||||||
# log for evals or other scripts that need the raw completion
|
|
||||||
if self.config.log_completions:
|
|
||||||
assert self.config.log_completions_folder is not None
|
|
||||||
log_file = os.path.join(
|
|
||||||
self.config.log_completions_folder,
|
|
||||||
# use the metric model name (for draft editor)
|
|
||||||
f'{self.metrics.model_name.replace("/", "__")}-{time.time()}.json',
|
|
||||||
)
|
|
||||||
|
|
||||||
# set up the dict to be logged
|
|
||||||
_d = {
|
|
||||||
'messages': messages,
|
|
||||||
'response': resp,
|
|
||||||
'args': args,
|
|
||||||
'kwargs': {k: v for k, v in kwargs.items() if k != 'messages'},
|
|
||||||
'timestamp': time.time(),
|
|
||||||
'cost': cost,
|
|
||||||
}
|
|
||||||
|
|
||||||
# if non-native function calling, save messages/response separately
|
|
||||||
if mock_function_calling:
|
|
||||||
# Overwrite response as non-fncall to be consistent with messages
|
|
||||||
_d['response'] = non_fncall_response
|
|
||||||
|
|
||||||
# Save fncall_messages/response separately
|
|
||||||
_d['fncall_messages'] = original_fncall_messages
|
|
||||||
_d['fncall_response'] = resp
|
|
||||||
with open(log_file, 'w') as f:
|
|
||||||
f.write(json.dumps(_d))
|
|
||||||
|
|
||||||
return resp
|
|
||||||
except APIError as e:
|
|
||||||
if 'Attention Required! | Cloudflare' in str(e):
|
|
||||||
raise CloudFlareBlockageError(
|
|
||||||
'Request blocked by CloudFlare'
|
|
||||||
) from e
|
|
||||||
raise
|
|
||||||
|
|
||||||
self._completion = wrapper
|
self._completion = wrapper
|
||||||
|
|
||||||
@@ -414,6 +400,25 @@ class LLM(RetryMixin, DebugMixin):
|
|||||||
):
|
):
|
||||||
self.config.max_output_tokens = self.model_info['max_tokens']
|
self.config.max_output_tokens = self.model_info['max_tokens']
|
||||||
|
|
||||||
|
# Initialize function calling capability
|
||||||
|
# Check if model name is in our supported list
|
||||||
|
model_name_supported = (
|
||||||
|
self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||||
|
or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||||
|
or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Handle native_tool_calling user-defined configuration
|
||||||
|
if self.config.native_tool_calling is None:
|
||||||
|
self._function_calling_active = model_name_supported
|
||||||
|
elif self.config.native_tool_calling is False:
|
||||||
|
self._function_calling_active = False
|
||||||
|
else:
|
||||||
|
# try to enable native tool calling if supported by the model
|
||||||
|
self._function_calling_active = litellm.supports_function_calling(
|
||||||
|
model=self.config.model
|
||||||
|
)
|
||||||
|
|
||||||
def vision_is_active(self) -> bool:
|
def vision_is_active(self) -> bool:
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter('ignore')
|
warnings.simplefilter('ignore')
|
||||||
@@ -455,24 +460,11 @@ class LLM(RetryMixin, DebugMixin):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def is_function_calling_active(self) -> bool:
|
def is_function_calling_active(self) -> bool:
|
||||||
# Check if model name is in our supported list
|
"""Returns whether function calling is supported and enabled for this LLM instance.
|
||||||
model_name_supported = (
|
|
||||||
self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
|
|
||||||
or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
|
|
||||||
or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Handle native_tool_calling user-defined configuration
|
The result is cached during initialization for performance.
|
||||||
if self.config.native_tool_calling is None:
|
"""
|
||||||
return model_name_supported
|
return self._function_calling_active
|
||||||
elif self.config.native_tool_calling is False:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
# try to enable native tool calling if supported by the model
|
|
||||||
supports_fn_call = litellm.supports_function_calling(
|
|
||||||
model=self.config.model
|
|
||||||
)
|
|
||||||
return supports_fn_call
|
|
||||||
|
|
||||||
def _post_completion(self, response: ModelResponse) -> float:
|
def _post_completion(self, response: ModelResponse) -> float:
|
||||||
"""Post-process the completion response.
|
"""Post-process the completion response.
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ class RetryMixin:
|
|||||||
A retry decorator with the parameters customizable in configuration.
|
A retry decorator with the parameters customizable in configuration.
|
||||||
"""
|
"""
|
||||||
num_retries = kwargs.get('num_retries')
|
num_retries = kwargs.get('num_retries')
|
||||||
retry_exceptions = kwargs.get('retry_exceptions')
|
retry_exceptions: tuple = kwargs.get('retry_exceptions', ())
|
||||||
retry_min_wait = kwargs.get('retry_min_wait')
|
retry_min_wait = kwargs.get('retry_min_wait')
|
||||||
retry_max_wait = kwargs.get('retry_max_wait')
|
retry_max_wait = kwargs.get('retry_max_wait')
|
||||||
retry_multiplier = kwargs.get('retry_multiplier')
|
retry_multiplier = kwargs.get('retry_multiplier')
|
||||||
@@ -39,7 +39,9 @@ class RetryMixin:
|
|||||||
before_sleep=before_sleep,
|
before_sleep=before_sleep,
|
||||||
stop=stop_after_attempt(num_retries) | stop_if_should_exit(),
|
stop=stop_after_attempt(num_retries) | stop_if_should_exit(),
|
||||||
reraise=True,
|
reraise=True,
|
||||||
retry=(retry_if_exception_type(retry_exceptions)),
|
retry=(
|
||||||
|
retry_if_exception_type(retry_exceptions)
|
||||||
|
), # retry only for these types
|
||||||
wait=wait_exponential(
|
wait=wait_exponential(
|
||||||
multiplier=retry_multiplier,
|
multiplier=retry_multiplier,
|
||||||
min=retry_min_wait,
|
min=retry_min_wait,
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""Bash-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
|
"""Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""Browsing-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
|
"""Browsing-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
|
||||||
|
|
||||||
from conftest import _close_test_runtime, _load_runtime
|
from conftest import _close_test_runtime, _load_runtime
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""Edit-related tests for the EventStreamRuntime."""
|
"""Edit-related tests for the DockerRuntime."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""Env vars related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
|
"""Env vars related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""Image-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
|
"""Image-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from conftest import _close_test_runtime, _load_runtime
|
from conftest import _close_test_runtime, _load_runtime
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""Bash-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
|
"""Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
|
|||||||
@@ -389,27 +389,6 @@ def test_completion_with_two_positional_args(mock_litellm_completion, default_co
|
|||||||
) # No positional args should be passed to litellm_completion here
|
) # No positional args should be passed to litellm_completion here
|
||||||
|
|
||||||
|
|
||||||
@patch('openhands.llm.llm.litellm_completion')
|
|
||||||
def test_llm_cloudflare_blockage(mock_litellm_completion, default_config):
|
|
||||||
from litellm.exceptions import APIError
|
|
||||||
|
|
||||||
from openhands.core.exceptions import CloudFlareBlockageError
|
|
||||||
|
|
||||||
llm = LLM(default_config)
|
|
||||||
mock_litellm_completion.side_effect = APIError(
|
|
||||||
message='Attention Required! | Cloudflare',
|
|
||||||
llm_provider='test_provider',
|
|
||||||
model='test_model',
|
|
||||||
status_code=403,
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(CloudFlareBlockageError, match='Request blocked by CloudFlare'):
|
|
||||||
llm.completion(messages=[{'role': 'user', 'content': 'Hello'}])
|
|
||||||
|
|
||||||
# Ensure the completion was called
|
|
||||||
mock_litellm_completion.assert_called_once()
|
|
||||||
|
|
||||||
|
|
||||||
@patch('openhands.llm.llm.litellm.token_counter')
|
@patch('openhands.llm.llm.litellm.token_counter')
|
||||||
def test_get_token_count_with_dict_messages(mock_token_counter, default_config):
|
def test_get_token_count_with_dict_messages(mock_token_counter, default_config):
|
||||||
mock_token_counter.return_value = 42
|
mock_token_counter.return_value = 42
|
||||||
|
|||||||
@@ -128,38 +128,3 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
|
|||||||
assert cached_user_messages[0].content[0].text.startswith('You are OpenHands agent')
|
assert cached_user_messages[0].content[0].text.startswith('You are OpenHands agent')
|
||||||
assert cached_user_messages[2].content[0].text.startswith('User message 1')
|
assert cached_user_messages[2].content[0].text.startswith('User message 1')
|
||||||
assert cached_user_messages[3].content[0].text.startswith('User message 1')
|
assert cached_user_messages[3].content[0].text.startswith('User message 1')
|
||||||
|
|
||||||
|
|
||||||
def test_prompt_caching_headers(codeact_agent: CodeActAgent):
|
|
||||||
history = list()
|
|
||||||
# Setup
|
|
||||||
msg1 = MessageAction('Hello, agent!')
|
|
||||||
msg1._source = 'user'
|
|
||||||
history.append(msg1)
|
|
||||||
msg2 = MessageAction('Hello, user!')
|
|
||||||
msg2._source = 'agent'
|
|
||||||
history.append(msg2)
|
|
||||||
|
|
||||||
mock_state = Mock()
|
|
||||||
mock_state.history = history
|
|
||||||
mock_state.max_iterations = 5
|
|
||||||
mock_state.iteration = 0
|
|
||||||
mock_state.extra_data = {}
|
|
||||||
|
|
||||||
codeact_agent.reset()
|
|
||||||
|
|
||||||
# Create a mock for litellm_completion
|
|
||||||
def check_headers(**kwargs):
|
|
||||||
assert 'extra_headers' in kwargs
|
|
||||||
assert 'anthropic-beta' in kwargs['extra_headers']
|
|
||||||
assert kwargs['extra_headers']['anthropic-beta'] == 'prompt-caching-2024-07-31'
|
|
||||||
return ModelResponse(
|
|
||||||
choices=[{'message': {'content': 'Hello! How can I assist you today?'}}]
|
|
||||||
)
|
|
||||||
|
|
||||||
codeact_agent.llm._completion_unwrapped = check_headers
|
|
||||||
result = codeact_agent.step(mock_state)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert isinstance(result, MessageAction)
|
|
||||||
assert result.content == 'Hello! How can I assist you today?'
|
|
||||||
|
|||||||
Reference in New Issue
Block a user